From aef44e15571c91ab8a9416ab5f36a43672df7219 Mon Sep 17 00:00:00 2001 From: yanghaoran Date: Sun, 28 Jun 2020 11:21:44 +0800 Subject: [PATCH] synchronize with latest Ascend software suite 28 Jun 2020 --- inc/external/ge/ge_api.h | 9 + inc/external/ge/ge_api_types.h | 22 +- inc/external/graph/types.h | 1 + inc/external/register/register_fmk_types.h | 11 +- inc/framework/common/debug/log.h | 4 +- inc/framework/common/ge_inner_error_codes.h | 2 + inc/framework/common/ge_types.h | 76 +- inc/framework/common/helper/om_file_helper.h | 5 +- inc/framework/common/types.h | 1 + inc/framework/executor/ge_executor.h | 10 + .../ge_runtime_dummy/davinci_model.h | 113 -- inc/framework/ge_runtime_dummy/model_runner.h | 58 - inc/framework/ge_runtime_dummy/op_info.h | 72 - inc/framework/ge_runtime_dummy/task_info.h | 394 ------ inc/framework/omg/omg.h | 6 + inc/framework/omg/omg_inner_types.h | 4 +- inc/graph/compute_graph.h | 3 +- inc/graph/debug/ge_attr_define.h | 4 + inc/graph/ge_tensor.h | 3 + inc/graph/utils/graph_utils.h | 43 +- inc/graph/utils/type_utils.h | 2 + src/common/graph/compute_graph.cc | 150 +-- src/common/graph/format_refiner.cc | 2 +- src/common/graph/ge_attr_define.cc | 6 +- src/common/graph/ge_attr_value.cc | 5 + src/common/graph/ge_tensor.cc | 18 + src/common/graph/graph.mk | 182 +++ src/common/graph/model_serialize.cc | 39 + src/common/graph/module.mk | 3 + src/common/graph/tensor.cc | 2 + src/common/graph/utils/graph_utils.cc | 36 +- src/common/graph/utils/op_desc_utils.cc | 1 - src/common/graph/utils/tensor_utils.cc | 2 + src/common/graph/utils/type_utils.cc | 22 +- src/ge/CMakeLists.txt | 21 +- src/ge/client/ge_api.cc | 29 +- src/ge/client/module.mk | 111 ++ src/ge/common/auth/file_saver.cc | 3 +- src/ge/common/ge/plugin_manager.cc | 44 +- src/ge/common/ge/tbe_plugin_manager.cc | 293 ++++ src/ge/common/ge/tbe_plugin_manager.h | 73 + src/ge/common/ge_common.mk | 241 ++++ src/ge/common/helper/model_cache_helper.cc | 6 +- src/ge/common/helper/model_helper.cc | 2 +- src/ge/common/helper/om_file_helper.cc | 12 +- src/ge/common/model_saver.cc | 10 +- src/ge/common/module.mk | 3 + src/ge/common/op/ge_op_utils.cc | 2 + src/ge/common/profiling/profiling_manager.cc | 24 +- src/ge/common/types.cc | 1 + src/ge/common/util.cc | 24 +- src/ge/engine_manager/dnnengine_manager.cc | 4 +- src/ge/executor/CMakeLists.txt | 1 + src/ge/executor/ge_executor.cc | 169 ++- src/ge/executor/module.mk | 202 +++ src/ge/ge_inference.mk | 407 ++++++ .../ge_local_engine/engine/host_cpu_engine.cc | 6 +- src/ge/ge_local_engine/module.mk | 59 + .../ge_local_ops_kernel_info.cc | 2 +- .../ops_kernel_store/op/no_op.cc | 2 +- src/ge/ge_runner.mk | 429 ++++++ src/ge/ge_train.mk | 333 +++++ src/ge/generator/ge_generator.cc | 97 +- .../graph/build/memory/block_mem_assigner.cc | 101 +- .../graph/build/memory/block_mem_assigner.h | 19 +- src/ge/graph/build/memory/module.mk | 98 ++ src/ge/graph/build/model_builder.cc | 37 +- src/ge/graph/build/stream_allocator.cc | 22 +- src/ge/graph/build/stream_allocator.h | 1 + src/ge/graph/build/task_generator.cc | 36 +- src/ge/graph/execute/graph_execute.cc | 71 + src/ge/graph/execute/graph_execute.h | 11 + src/ge/graph/label/while_label_maker.cc | 2 +- .../load/new_model_manager/aipp_utils.cc | 90 ++ .../graph/load/new_model_manager/aipp_utils.h | 48 + .../load/new_model_manager/data_dumper.cc | 20 +- .../load/new_model_manager/davinci_model.cc | 275 +++- .../load/new_model_manager/davinci_model.h | 28 + .../load/new_model_manager/model_manager.cc | 134 +- .../load/new_model_manager/model_manager.h | 28 +- .../task_info/hccl_task_info.cc | 1 + .../task_info/kernel_ex_task_info.cc | 33 +- .../task_info/kernel_ex_task_info.h | 2 + .../task_info/kernel_task_info.cc | 29 + .../task_info/kernel_task_info.h | 5 + .../super_kernel/super_kernel_factory.cc | 8 +- src/ge/graph/manager/graph_manager.cc | 67 +- src/ge/graph/manager/graph_manager.h | 3 +- src/ge/graph/manager/graph_manager_utils.cc | 1 + src/ge/graph/manager/graph_manager_utils.h | 8 +- src/ge/graph/manager/graph_var_manager.cc | 2 +- src/ge/graph/manager/graph_var_manager.h | 2 +- src/ge/graph/manager/util/rt_context_util.h | 6 +- src/ge/graph/optimize/graph_optimize.cc | 8 +- .../optimizer/allreduce_fusion_pass.cc | 397 ++++++ .../optimizer/allreduce_fusion_pass.h | 55 + .../partition/dynamic_shape_partition.cc | 5 +- src/ge/graph/partition/graph_partition.cc | 29 +- .../passes/aicpu_constant_folding_pass.cc | 4 +- src/ge/graph/passes/atomic_addr_clean_pass.cc | 7 +- src/ge/graph/passes/cast_remove_pass.cc | 4 +- src/ge/graph/passes/cond_pass.cc | 2 +- src/ge/graph/passes/cond_remove_pass.cc | 2 +- src/ge/graph/passes/enter_pass.cc | 3 - src/ge/graph/passes/flow_ctrl_pass.cc | 4 +- src/ge/graph/passes/for_pass.cc | 2 +- src/ge/graph/passes/hccl_memcpy_pass.cc | 2 +- src/ge/graph/passes/iterator_op_pass.cc | 22 +- src/ge/graph/passes/mark_agnostic_pass.cc | 40 + src/ge/graph/passes/mark_agnostic_pass.h | 29 + src/ge/graph/passes/net_output_pass.cc | 26 +- src/ge/graph/passes/net_output_pass.h | 10 + .../passes/no_use_reshape_remove_pass.cc | 2 +- src/ge/graph/passes/permute_pass.cc | 4 +- .../passes/replace_with_empty_const_pass.cc | 12 +- .../same_transdata_breadth_fusion_pass.cc | 9 +- src/ge/graph/passes/subgraph_pass.cc | 9 +- .../graph/passes/switch_data_edges_bypass.cc | 221 +++ .../graph/passes/switch_data_edges_bypass.h | 32 + .../passes/switch_dead_branch_elimination.cc | 2 +- src/ge/graph/passes/switch_op_pass.cc | 45 +- src/ge/graph/passes/switch_op_pass.h | 6 +- .../passes/transop_breadth_fusion_pass.cc | 8 + .../transop_nearby_allreduce_fusion_pass.cc | 2 +- .../transop_symmetry_elimination_pass.cc | 3 +- src/ge/graph/passes/unused_op_remove_pass.cc | 2 +- src/ge/graph/preprocess/graph_preprocess.cc | 203 ++- src/ge/graph/preprocess/graph_preprocess.h | 3 + .../graph/preprocess/insert_op/ge_aipp_op.cc | 52 +- .../insert_op/util_insert_aipp_op.cc | 158 ++- .../insert_op/util_insert_aipp_op.h | 5 + .../preprocess/multi_batch_copy_graph.cc | 88 +- .../graph/preprocess/multi_batch_copy_graph.h | 1 + src/ge/host_kernels/slice_kernel.cc | 33 +- src/ge/host_kernels/slice_kernel.h | 2 + src/ge/host_kernels/unsqueeze_kernel.cc | 70 + src/ge/host_kernels/unsqueeze_kernel.h | 32 + .../executor/hybrid_execution_context.h | 4 +- .../executor/hybrid_model_async_executor.cc | 2 +- .../hybrid/executor/hybrid_model_executor.cc | 1 - src/ge/hybrid/executor/rt_callback_manager.cc | 7 +- .../executor/worker/execution_engine.cc | 3 +- .../executor/worker/task_compile_engine.cc | 3 +- src/ge/hybrid/model/hybrid_model_builder.cc | 9 +- .../node_executor/aicpu/aicpu_ext_info.cc | 204 +++ .../node_executor/aicpu/aicpu_ext_info.h | 71 + .../aicpu/aicpu_node_executor.cc | 793 ++++++----- .../node_executor/aicpu/aicpu_node_executor.h | 121 +- .../compiledsubgraph/known_node_executor.cc | 1 + .../hostcpu/ge_local_node_executor.cc | 24 +- src/ge/hybrid/node_executor/node_executor.cc | 2 +- src/ge/hybrid/node_executor/task_context.cc | 6 +- src/ge/init/gelib.cc | 72 +- src/ge/init/gelib.h | 5 +- src/ge/ir_build/atc_ir_common.cc | 93 +- src/ge/ir_build/atc_ir_common.h | 11 +- src/ge/ir_build/ge_ir_build.cc | 98 +- src/ge/module.mk | 4 + src/ge/offline/main.cc | 1195 +++++++++++++++++ src/ge/offline/module.mk | 53 + src/ge/offline/single_op_parser.cc | 354 +++++ src/ge/offline/single_op_parser.h | 76 ++ src/ge/plugin/engine/module.mk | 59 + src/ge/session/inner_session.cc | 24 + src/ge/session/inner_session.h | 2 + src/ge/session/omg.cc | 909 +++++++++++++ src/ge/session/session_manager.cc | 18 + src/ge/session/session_manager.h | 10 + src/ge/single_op/single_op_model.cc | 2 +- src/ge/single_op/task/aicpu_task_builder.cc | 6 +- src/proto/task.proto | 4 + .../fwkacllib/inc/cce/aicpu_engine_struct.h | 6 +- .../fwkacllib/inc/cce/fwk_adpt_struct.h | 20 +- third_party/fwkacllib/inc/ops/aipp.h | 2 + third_party/fwkacllib/inc/ops/array_ops.h | 144 +- third_party/fwkacllib/inc/ops/audio_ops.h | 12 + third_party/fwkacllib/inc/ops/batch_ops.h | 9 + third_party/fwkacllib/inc/ops/bitwise_ops.h | 3 + .../fwkacllib/inc/ops/boosted_trees_ops.h | 3 + .../inc/ops/candidate_sampling_ops.h | 21 + .../fwkacllib/inc/ops/control_flow_ops.h | 24 + third_party/fwkacllib/inc/ops/ctc_ops.h | 2 + third_party/fwkacllib/inc/ops/data_flow_ops.h | 147 ++ .../inc/ops/elewise_calculation_ops.h | 718 ++++++++-- third_party/fwkacllib/inc/ops/image_ops.h | 81 ++ third_party/fwkacllib/inc/ops/linalg_ops.h | 28 + third_party/fwkacllib/inc/ops/logging_ops.h | 7 + third_party/fwkacllib/inc/ops/lookup_ops.h | 20 + third_party/fwkacllib/inc/ops/math_ops.h | 93 +- .../inc/ops/matrix_calculation_ops.h | 265 ++-- .../fwkacllib/inc/ops/nn_batch_norm_ops.h | 49 +- .../fwkacllib/inc/ops/nn_calculation_ops.h | 139 +- third_party/fwkacllib/inc/ops/nn_detect_ops.h | 167 ++- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 86 +- .../fwkacllib/inc/ops/nn_pooling_ops.h | 142 +- .../fwkacllib/inc/ops/nn_training_ops.h | 245 +++- third_party/fwkacllib/inc/ops/no_op.h | 3 + .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 82 +- third_party/fwkacllib/inc/ops/pad_ops.h | 76 +- third_party/fwkacllib/inc/ops/parsing_ops.h | 2 + third_party/fwkacllib/inc/ops/quantize_ops.h | 7 + .../fwkacllib/inc/ops/ragged_array_ops.h | 2 + .../fwkacllib/inc/ops/ragged_conversion_ops.h | 2 + .../fwkacllib/inc/ops/ragged_math_ops.h | 2 + third_party/fwkacllib/inc/ops/random_ops.h | 54 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 138 +- third_party/fwkacllib/inc/ops/sdca_ops.h | 2 + third_party/fwkacllib/inc/ops/selection_ops.h | 320 +++-- third_party/fwkacllib/inc/ops/set_ops.h | 8 + third_party/fwkacllib/inc/ops/sparse_ops.h | 63 + third_party/fwkacllib/inc/ops/spectral_ops.h | 2 + .../fwkacllib/inc/ops/split_combination_ops.h | 49 +- third_party/fwkacllib/inc/ops/state_ops.h | 16 + .../fwkacllib/inc/ops/stateful_random_ops.h | 16 + .../fwkacllib/inc/ops/stateless_random_ops.h | 4 + third_party/fwkacllib/inc/ops/string_ops.h | 35 + .../fwkacllib/inc/ops/transformation_ops.h | 112 +- .../fwkacllib/inc/register/op_registry.h | 2 + third_party/fwkacllib/inc/runtime/base.h | 16 + third_party/fwkacllib/inc/runtime/mem.h | 1 + third_party/fwkacllib/inc/tdt/data_common.h | 2 +- 221 files changed, 12009 insertions(+), 2450 deletions(-) delete mode 100644 inc/framework/ge_runtime_dummy/davinci_model.h delete mode 100644 inc/framework/ge_runtime_dummy/model_runner.h delete mode 100644 inc/framework/ge_runtime_dummy/op_info.h delete mode 100644 inc/framework/ge_runtime_dummy/task_info.h create mode 100644 src/common/graph/graph.mk create mode 100644 src/common/graph/module.mk create mode 100644 src/ge/client/module.mk create mode 100644 src/ge/common/ge/tbe_plugin_manager.cc create mode 100644 src/ge/common/ge/tbe_plugin_manager.h create mode 100644 src/ge/common/ge_common.mk create mode 100644 src/ge/common/module.mk create mode 100644 src/ge/executor/module.mk create mode 100644 src/ge/ge_inference.mk create mode 100644 src/ge/ge_local_engine/module.mk create mode 100644 src/ge/ge_runner.mk create mode 100644 src/ge/ge_train.mk create mode 100644 src/ge/graph/build/memory/module.mk create mode 100644 src/ge/graph/load/new_model_manager/aipp_utils.cc create mode 100644 src/ge/graph/load/new_model_manager/aipp_utils.h create mode 100644 src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc create mode 100644 src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h create mode 100644 src/ge/graph/passes/mark_agnostic_pass.cc create mode 100644 src/ge/graph/passes/mark_agnostic_pass.h create mode 100644 src/ge/graph/passes/switch_data_edges_bypass.cc create mode 100644 src/ge/graph/passes/switch_data_edges_bypass.h create mode 100644 src/ge/host_kernels/unsqueeze_kernel.cc create mode 100644 src/ge/host_kernels/unsqueeze_kernel.h create mode 100644 src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc create mode 100644 src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h create mode 100644 src/ge/module.mk create mode 100644 src/ge/offline/main.cc create mode 100644 src/ge/offline/module.mk create mode 100644 src/ge/offline/single_op_parser.cc create mode 100644 src/ge/offline/single_op_parser.h create mode 100644 src/ge/plugin/engine/module.mk create mode 100644 src/ge/session/omg.cc diff --git a/inc/external/ge/ge_api.h b/inc/external/ge/ge_api.h index f3e9fcb6..08156539 100644 --- a/inc/external/ge/ge_api.h +++ b/inc/external/ge/ge_api.h @@ -77,6 +77,15 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { /// Status RunGraph(uint32_t graphId, const std::vector &inputs, std::vector &outputs); + /// + /// @ingroup ge_graph + /// @brief build graph in the session with specific session id + /// @param [in] graphId: graph id + /// @param [in] inputs: input data + /// @return Status result of function + /// + Status BuildGraph(uint32_t graphId, const std::vector &inputs); + /// /// @ingroup ge_graph /// @brief run graph in the session with specific session id asynchronously diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 09561212..5a8482e7 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -157,6 +157,9 @@ const std::string OUTPUT_DATATYPE = "ge.outputDatatype"; // congigure opSelectImplmode to setting op select implmode const std::string OP_SELECT_IMPL_MODE = "ge.opSelectImplmode"; +// congigure optypelist_for_implmode to setting which op use implmode +const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode"; + // configure whether to enable hcom parallel by session constructor options param, // its value should be "0" or "1", default value is "0" const std::string HCOM_PARALLEL = "ge.hcomParallel"; @@ -258,12 +261,12 @@ using RunAsyncCallback = std::function ir_builder_suppported_options = { - INPUT_FORMAT, INPUT_SHAPE, DYNAMIC_BATCH_SIZE, DYNAMIC_IMAGE_SIZE, - INSERT_OP_FILE, OUTPUT_TYPE, BUFFER_OPTIMIZE, ENABLE_COMPRESS_WEIGHT, - COMPRESS_WEIGHT_CONF, OUT_NODES, INPUT_FP16_NODES, LOG_LEVEL}; + INPUT_FORMAT, INPUT_SHAPE, OP_NAME_MAP, DYNAMIC_BATCH_SIZE, + DYNAMIC_IMAGE_SIZE, INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, + AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES, + LOG_LEVEL}; // for interface: aclgrphBuildInitialize -const std::set global_options = {HEAD_STREAM, - CORE_TYPE, +const std::set global_options = {CORE_TYPE, SOC_VERSION, + BUFFER_OPTIMIZE, + ENABLE_COMPRESS_WEIGHT, + COMPRESS_WEIGHT_CONF, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, AUTO_TUNE_MODE, @@ -298,7 +305,8 @@ const std::set global_options = {HEAD_STREAM, FUSION_SWITCH_FILE, ENABLE_SMALL_CHANNEL, QUANT_OPTIMIZE, - OP_SELECT_IMPL_MODE}; + OP_SELECT_IMPL_MODE, + OPTYPELIST_FOR_IMPLMODE}; } // namespace ir_option } // namespace ge diff --git a/inc/external/graph/types.h b/inc/external/graph/types.h index 6a8362ba..4cd9ba91 100644 --- a/inc/external/graph/types.h +++ b/inc/external/graph/types.h @@ -143,6 +143,7 @@ enum Format { FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format FORMAT_FRACTAL_ZN_LSTM, + FORMAT_FRACTAL_Z_G, FORMAT_RESERVED, FORMAT_ALL }; diff --git a/inc/external/register/register_fmk_types.h b/inc/external/register/register_fmk_types.h index ef469f3a..97616060 100644 --- a/inc/external/register/register_fmk_types.h +++ b/inc/external/register/register_fmk_types.h @@ -25,11 +25,12 @@ namespace domi { /// @brief AI framework types /// enum FrameworkType { - FMK_TYPE_C = 0, - FMK_TYPE_MINDSPORE = 1, - FMK_TYPE_T = 3, - FMK_TYPE_A_NN, - FMK_TYPE_RESERVED, + CAFFE = 0, + MINDSPORE = 1, + TENSORFLOW = 3, + ANDROID_NN, + ONNX, + FRAMEWORK_RESERVED, }; } // namespace domi diff --git a/inc/framework/common/debug/log.h b/inc/framework/common/debug/log.h index 147c3bdf..28c6585e 100644 --- a/inc/framework/common/debug/log.h +++ b/inc/framework/common/debug/log.h @@ -231,7 +231,7 @@ using cce::ccStatus_t; DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \ exec_expr; \ } \ - } + }; // If expr is not RT_ERROR_NONE, print the log and return #define GE_CHK_RT_RET(expr) \ @@ -259,7 +259,7 @@ using cce::ccStatus_t; if (expr) { \ exec_expr; \ } \ - } + }; // If make_shared is abnormal, print the log and execute the statement #define GE_MAKE_SHARED(exec_expr0, exec_expr1) \ diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h index 4b5538d3..c4a36597 100644 --- a/inc/framework/common/ge_inner_error_codes.h +++ b/inc/framework/common/ge_inner_error_codes.h @@ -280,6 +280,8 @@ GE_ERRORNO_RUNTIME(GE_RTI_CALL_HCCL_REDUCE_SCATTER_FAILED, 47, "call hccl hcom r // Executor module error code definition GE_ERRORNO_EXECUTOR(GE_EXEC_NOT_INIT, 1, "GE Executor is not yet initialized."); +GE_ERRORNO_EXECUTOR(GE_AIPP_NOT_EXIST, 2, "GE AIPP is not exist."); +GE_ERRORNO_EXECUTOR(GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY, 3, "GE Dynamic AIPP is not support to query temporarily."); // Generator module error code definition GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_INIT_FAILED, 1, "Graph manager initialize failed."); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index ae83e40d..bcc90d25 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -33,11 +33,11 @@ enum RuntimeType { HOST = 0, DEVICE = 1 }; enum PerfLevel { GEN_TASK_WITH_FUSION = -1, GEN_TASK_WITHOUT_L2FUSION = 3, GEN_TASK_WITHOUT_FUSION = 4 }; enum FrameworkType { - FMK_TYPE_C = 0, - FMK_TYPE_MINDSPORE = 1, - FMK_TYPE_T = 3, - FMK_TYPE_A_NN, - FMK_TYPE_RESERVED, + CAFFE = 0, + MINDSPORE = 1, + TENSORFLOW = 3, + ANDROID_NN, + FRAMEWORK_RESERVED, }; enum OpEngineType { @@ -111,6 +111,72 @@ struct InputOutputDescInfo { ShapeDescription shape_info; }; +// Definition of model io dims +struct InputOutputDims { + std::string name; + size_t dim_num; + uint32_t size; + std::vector dims; +}; + +// Definition of model io dims +struct OriginInputInfo { + Format format; + DataType data_type; + uint32_t dim_num; +}; + +// The structure of AIPP info +struct AippConfigInfo { + int8_t input_format; + int32_t src_image_size_w; + int32_t src_image_size_h; + int8_t crop; + int32_t load_start_pos_w; + int32_t load_start_pos_h; + int32_t crop_size_w; + int32_t crop_size_h; + int8_t resize; + int32_t resize_output_w; + int32_t resize_output_h; + int8_t padding; + int32_t left_padding_size; + int32_t right_padding_size; + int32_t top_padding_size; + int32_t bottom_padding_size; + int8_t csc_switch; + int8_t rbuv_swap_switch; + int8_t ax_swap_switch; + int8_t single_line_mode; + int32_t matrix_r0c0; + int32_t matrix_r0c1; + int32_t matrix_r0c2; + int32_t matrix_r1c0; + int32_t matrix_r1c1; + int32_t matrix_r1c2; + int32_t matrix_r2c0; + int32_t matrix_r2c1; + int32_t matrix_r2c2; + int32_t output_bias_0; + int32_t output_bias_1; + int32_t output_bias_2; + int32_t input_bias_0; + int32_t input_bias_1; + int32_t input_bias_2; + int32_t mean_chn_0; + int32_t mean_chn_1; + int32_t mean_chn_2; + int32_t mean_chn_3; + float min_chn_0; + float min_chn_1; + float min_chn_2; + float min_chn_3; + float var_reci_chn_0; + float var_reci_chn_1; + float var_reci_chn_2; + float var_reci_chn_3; +}; + // The structure of offline Modeldata struct ModelData { void *model_data = nullptr; // Model binary data start addr diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h index 1e4cee9b..fec7e294 100644 --- a/inc/framework/common/helper/om_file_helper.h +++ b/inc/framework/common/helper/om_file_helper.h @@ -59,15 +59,14 @@ class OmFileLoadHelper { Status GetModelPartition(ModelPartitionType type, ModelPartition &partition); + OmFileContext context_; + private: Status CheckModelValid(const ge::ModelData &model) const; Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size); bool is_inited_{false}; - - public: - OmFileContext context_; }; class OmFileSaveHelper { diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index fe5cca62..e3844a61 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -160,6 +160,7 @@ REGISTER_OPTYPE_DECLARE(SLICE, "Slice"); REGISTER_OPTYPE_DECLARE(SLICED, "SliceD"); REGISTER_OPTYPE_DECLARE(FLOORDIV, "FloorDiv"); REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze"); +REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze"); REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice"); REGISTER_OPTYPE_DECLARE(RANGE, "Range"); REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals"); diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 96f204b2..87e30805 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -96,6 +96,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { /// ge::Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info); + ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info); + /// /// @ingroup ge /// @brief Set dynamic image info @@ -110,6 +112,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { const std::vector &aippBatchPara, const kAippDynamicPara &aippParms); + ge::Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc); @@ -206,6 +211,11 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { static ge::Status ReleaseSingleOpResource(void *stream); + ge::Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count); + ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); + ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, + std::vector &output_dims); + private: static bool isInit_; }; diff --git a/inc/framework/ge_runtime_dummy/davinci_model.h b/inc/framework/ge_runtime_dummy/davinci_model.h deleted file mode 100644 index 91e70159..00000000 --- a/inc/framework/ge_runtime_dummy/davinci_model.h +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ -#define INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ - -#include -#include - -#include "ge_runtime/op_info.h" -#include "ge_runtime/task_info.h" - -namespace ge { -namespace model_runner { -class DavinciModel { - public: - DavinciModel(const std::vector> &task_info_list, - const std::vector> &data_info_list, - const std::vector> &output_info_list, - const std::vector> &constant_info_list, - const std::vector &variable_info_list, - const std::vector &wait_active_stream_list, - const std::vector &force_copy_stream_list, uint64_t mem_size = 0, uint64_t weight_size = 0, - uint64_t var_size = 0, uintptr_t logic_mem_base = 0, uintptr_t logic_weight_base = 0, - uintptr_t logic_var_base = 0, uint32_t stream_num = 0, uint32_t batch_num = 0, uint32_t event_num = 0, - int32_t priority = 0) - : task_info_list_(task_info_list), - data_info_list_(data_info_list), - output_info_list_(output_info_list), - constant_info_list_(constant_info_list), - variable_info_list_(variable_info_list), - wait_active_stream_list_(wait_active_stream_list), - force_copy_stream_list_(force_copy_stream_list), - mem_size_(mem_size), - weight_size_(weight_size), - var_size_(var_size), - logic_mem_base_(logic_mem_base), - logic_weight_base_(logic_weight_base), - logic_var_base_(logic_var_base), - stream_num_(stream_num), - batch_num_(batch_num), - event_num_(event_num), - priority_(priority) {} - ~DavinciModel() {} - - uint64_t GetMemSize() const { return mem_size_; } - uint64_t GetWeightSize() const { return weight_size_; } - uint64_t GetVarSize() const { return var_size_; } - - uintptr_t GetLogicMemBase() const { return logic_mem_base_; } - uintptr_t GetLogicWeightBase() const { return logic_weight_base_; } - uintptr_t GetLogicVarBase() const { return logic_var_base_; } - - uint32_t GetStreamNum() const { return stream_num_; } - uint32_t GetBatchNum() const { return batch_num_; } - uint32_t GetEventNum() const { return event_num_; } - - const std::vector &GetWaitActiveStreams() const { return wait_active_stream_list_; } - const std::vector &GetForceCopyStreams() const { return force_copy_stream_list_; } - - int32_t GetPriority() const { return priority_; } - - const std::vector> &GetTaskInfoList() const { return task_info_list_; } - const std::vector> &GetDataInfoList() const { return data_info_list_; } - const std::vector> &GetOutputInfoList() const { return output_info_list_; } - const std::vector> &GetConstantInfoList() const { return output_info_list_; } - const std::vector &GetVariableInfoList() const { return variable_info_list_; } - - private: - std::vector> task_info_list_; - std::vector> data_info_list_; - std::vector> output_info_list_; - std::vector> constant_info_list_; - std::vector variable_info_list_; - - std::vector wait_active_stream_list_; - std::vector force_copy_stream_list_; - - uint64_t mem_size_; - uint64_t weight_size_; - uint64_t var_size_; - - uintptr_t logic_mem_base_; - uintptr_t logic_weight_base_; - uintptr_t logic_var_base_; - - uint32_t stream_num_; - uint32_t batch_num_; - uint32_t event_num_; - - int32_t priority_; - - // Disable to copy constructor and assignment operator - DavinciModel &operator=(const DavinciModel &) = delete; - DavinciModel(const DavinciModel &) = delete; -}; -} // namespace model_runner -} // namespace ge - -#endif // INC_FRAMEWORK_GE_RUNTIME_DAVINCI_MODEL_H_ diff --git a/inc/framework/ge_runtime_dummy/model_runner.h b/inc/framework/ge_runtime_dummy/model_runner.h deleted file mode 100644 index 6e7abcb9..00000000 --- a/inc/framework/ge_runtime_dummy/model_runner.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ -#define INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ - -#include -#include -#include - -#include "common/ge_inner_error_codes.h" -#include "common/ge_types.h" -#include "ge_runtime/davinci_model.h" - -namespace ge { -namespace model_runner { -class RuntimeModel; - -class ModelRunner { - public: - static ModelRunner &Instance(); - - bool LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint32_t model_id, - std::shared_ptr davinci_model, std::shared_ptr listener); - - const std::vector &GetTaskIdList(uint32_t model_id) const; - - bool UnloadModel(uint32_t model_id); - - bool RunModel(uint32_t model_id, const InputData &input_data, OutputData *output_data); - - bool GetInputOutputDescInfo(uint32_t model_id, bool zero_copy, std::vector *input_desc, - std::vector *output_desc, std::vector *input_format, - std::vector *output_format); - - private: - ModelRunner() = default; - ~ModelRunner() = default; - - std::unordered_map> runtime_models_; -}; -} // namespace model_runner -} // namespace ge - -#endif // INC_FRAMEWORK_GE_RUNTIME_MODEL_RUNNER_H_ diff --git a/inc/framework/ge_runtime_dummy/op_info.h b/inc/framework/ge_runtime_dummy/op_info.h deleted file mode 100644 index 22c16ed6..00000000 --- a/inc/framework/ge_runtime_dummy/op_info.h +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ -#define INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ - -#include -#include -#include - -namespace ge { -namespace model_runner { -struct TensorInfo { - int64_t GetShapeSize() const { - int64_t res = 1; - if (dims.empty()) { - return 0; - } - for (auto dim : dims) { - res *= dim; - } - return res; - } - - int64_t GetDim(uint32_t index) { - if (index >= dims.size()) { - return 0; - } - return dims[index]; - } - - std::vector dims; - uint32_t datatype; - uint32_t format; - uint32_t real_dim_cnt; - uint32_t size; - bool is_output; -}; - -struct OpInfo { - uint32_t index; - std::string name; - std::string type; - bool var_is_broadcast; - std::vector input_addrs; - std::vector output_addrs; - std::vector input_tensors; - std::vector output_tensors; - std::vector weight_tensors; - std::vector src_name; - std::vector src_index; - std::string weight_data; -}; - -using TensorInfoPtr = std::shared_ptr; -using OpInfoPtr = std::shared_ptr; -} // namespace model_runner -} // namespace ge -#endif // INC_FRAMEWORK_GE_RUNTIME_OP_INFO_H_ diff --git a/inc/framework/ge_runtime_dummy/task_info.h b/inc/framework/ge_runtime_dummy/task_info.h deleted file mode 100644 index a48ed68b..00000000 --- a/inc/framework/ge_runtime_dummy/task_info.h +++ /dev/null @@ -1,394 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ -#define INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ - -#include -#include -#include -#include -#include - -#include "cce/taskdown_api.h" - -namespace ge { -namespace model_runner { -enum TaskInfoType { - CCE = 0, - TBE, - AICPU, - LABEL_SET, - LABEL_SWITCH, - LABEL_GOTO, - EVENT_RECORD, - EVENT_WAIT, - FUSION_START, - FUSION_END, - HCCL, - PROFILER_TRACE, - MEMCPY_ASYNC, - STREAM_SWITCH, - STREAM_ACTIVE, - // Insert new task type here - REVSERVED = 23 -}; - -class TaskInfo { - public: - virtual ~TaskInfo() {} - uint32_t stream_id() const { return stream_id_; } - TaskInfoType type() const { return type_; } - - protected: - TaskInfo(uint32_t stream_id, TaskInfoType type) : stream_id_(stream_id), type_(type) {} - - private: - uint32_t stream_id_; - TaskInfoType type_; -}; - -class CceTaskInfo : public TaskInfo { - public: - CceTaskInfo(uint32_t stream_id, const cce::ccOpContext &ctx, const std::string &stub_func, uint32_t block_dim, - const std::vector &args, uint32_t args_size, const std::vector &sm_desc, - const std::vector &flow_table, const std::vector &args_offset, bool is_flowtable) - : TaskInfo(stream_id, TaskInfoType::CCE), - ctx_(ctx), - stub_func_(stub_func), - block_dim_(block_dim), - args_(args), - args_size_(args_size), - sm_desc_(sm_desc), - flow_table_(flow_table), - args_offset_(args_offset), - is_flowtable_(is_flowtable) {} - ~CceTaskInfo() override {} - - cce::ccOpContext cc_context() const { return ctx_; } - std::string stub_func() const { return stub_func_; } - uint32_t block_dim() const { return block_dim_; } - const std::vector &args() const { return args_; } - uint32_t args_size() const { return args_size_; } - const std::vector &sm_desc() const { return sm_desc_; } - const std::vector &flow_table() const { return flow_table_; } - const std::vector &args_offset() const { return args_offset_; } - bool is_flowtable() const { return is_flowtable_; } - - private: - cce::ccOpContext ctx_; - std::string stub_func_; - uint32_t block_dim_; - std::vector args_; - uint32_t args_size_; - std::vector sm_desc_; - std::vector flow_table_; - std::vector args_offset_; - bool is_flowtable_; -}; - -class TbeTaskInfo : public TaskInfo { - public: - TbeTaskInfo(uint32_t stream_id, const std::string &stub_func, uint32_t block_dim, const std::vector &args, - uint32_t args_size, const std::vector &sm_desc, void *binary, uint32_t binary_size, - const std::vector &meta_data, const std::vector &input_data_addrs, - const std::vector &output_data_addrs, const std::vector &workspace_addrs) - : TaskInfo(stream_id, TaskInfoType::TBE), - stub_func_(stub_func), - block_dim_(block_dim), - args_(args), - args_size_(args_size), - sm_desc_(sm_desc), - binary_(binary), - binary_size_(binary_size), - meta_data_(meta_data), - input_data_addrs_(input_data_addrs), - output_data_addrs_(output_data_addrs), - workspace_addrs_(workspace_addrs) {} - ~TbeTaskInfo() override {} - - const std::string &stub_func() const { return stub_func_; } - uint32_t block_dim() const { return block_dim_; } - const std::vector &args() const { return args_; } - uint32_t args_size() const { return args_size_; } - const std::vector &sm_desc() const { return sm_desc_; } - void *binary() const { return binary_; } - uint32_t binary_size() const { return binary_size_; } - const std::vector &meta_data() const { return meta_data_; } - const std::vector &input_data_addrs() const { return input_data_addrs_; } - const std::vector &output_data_addrs() const { return output_data_addrs_; } - const std::vector &workspace_addrs() const { return workspace_addrs_; } - - void SetBinary(void *binary, uint32_t binary_size) { - binary_ = binary; - binary_size_ = binary_size; - } - - private: - std::string stub_func_; - uint32_t block_dim_; - std::vector args_; - uint32_t args_size_; - std::vector sm_desc_; - void *binary_; - uint32_t binary_size_; - std::vector meta_data_; - std::vector input_data_addrs_; - std::vector output_data_addrs_; - std::vector workspace_addrs_; -}; - -class AicpuTaskInfo : public TaskInfo { - public: - AicpuTaskInfo(uint32_t stream_id, const string &so_name, const std::string &kernel_name, const std::string &node_def, - const std::vector &input_data_addrs, const std::vector &output_data_addrs) - : TaskInfo(stream_id, TaskInfoType::AICPU), - so_name_(so_name), - kernel_name_(kernel_name), - node_def_(node_def), - input_data_addrs_(input_data_addrs), - output_data_addrs_(output_data_addrs) {} - ~AicpuTaskInfo() override {} - - const std::string &so_name() const { return so_name_; } - const std::string &kernel_name() const { return kernel_name_; } - const std::string &node_def() const { return node_def_; } - const std::vector &input_data_addrs() const { return input_data_addrs_; } - const std::vector &output_data_addrs() const { return output_data_addrs_; } - - private: - std::string so_name_; - std::string kernel_name_; - std::string node_def_; - std::vector input_data_addrs_; - std::vector output_data_addrs_; -}; - -class LabelTaskInfo : public TaskInfo { - public: - uint32_t label_id() const { return label_id_; } - - protected: - LabelTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t label_id) - : TaskInfo(stream_id, type), label_id_(label_id) {} - virtual ~LabelTaskInfo() override {} - - uint32_t label_id_; -}; - -class LabelSetTaskInfo : public LabelTaskInfo { - public: - LabelSetTaskInfo(uint32_t stream_id, uint32_t label_id) - : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SET, label_id) {} - ~LabelSetTaskInfo() override {} -}; - -class LabelSwitchTaskInfo : public LabelTaskInfo { - public: - LabelSwitchTaskInfo(uint32_t stream_id, uint32_t label_id) - : LabelTaskInfo(stream_id, TaskInfoType::LABEL_SWITCH, label_id) {} - ~LabelSwitchTaskInfo() override {} -}; - -class LabelGotoTaskInfo : public LabelTaskInfo { - public: - LabelGotoTaskInfo(uint32_t stream_id, uint32_t label_id) - : LabelTaskInfo(stream_id, TaskInfoType::LABEL_GOTO, label_id) {} - ~LabelGotoTaskInfo() override {} -}; - -class EventTaskInfo : public TaskInfo { - public: - uint32_t event_id() const { return event_id_; } - - protected: - EventTaskInfo(uint32_t stream_id, TaskInfoType type, uint32_t event_id) - : TaskInfo(stream_id, type), event_id_(event_id) {} - virtual ~EventTaskInfo() override {} - - uint32_t event_id_; -}; - -class EventRecordTaskInfo : public EventTaskInfo { - public: - EventRecordTaskInfo(uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(stream_id, TaskInfoType::EVENT_RECORD, event_id) {} - ~EventRecordTaskInfo() override {} -}; - -class EventWaitTaskInfo : public EventTaskInfo { - public: - EventWaitTaskInfo(uint32_t stream_id, uint32_t event_id) - : EventTaskInfo(stream_id, TaskInfoType::EVENT_WAIT, event_id) {} - ~EventWaitTaskInfo() override {} -}; - -class FusionStartTaskInfo : public TaskInfo { - public: - explicit FusionStartTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_START) {} - ~FusionStartTaskInfo() override {} -}; - -class FusionEndTaskInfo : public TaskInfo { - public: - explicit FusionEndTaskInfo(uint32_t stream_id) : TaskInfo(stream_id, TaskInfoType::FUSION_END) {} - ~FusionEndTaskInfo() override {} -}; - -class HcclTaskInfo : public TaskInfo { - public: - HcclTaskInfo(uint32_t stream_id, const std::string hccl_type, void *input_data_addr, void *output_data_addr, - void *workspace_addr, int64_t workspace_size, int64_t hccl_stream_num, - const std::vector &private_def, void *ops_kernel_store, int32_t count, int64_t root_id, - int64_t op_type, int64_t data_type, std::function hcom_bind_model, - std::function hcom_unbind_model, - std::function, void *)> hcom_distribute_task) - : TaskInfo(stream_id, TaskInfoType::HCCL), - hccl_type_(hccl_type), - input_data_addr_(input_data_addr), - output_data_addr_(output_data_addr), - workspace_addr_(workspace_addr), - workspace_size_(workspace_size), - hccl_stream_num_(hccl_stream_num), - private_def_(private_def), - ops_kernel_store_(ops_kernel_store), - count_(count), - root_id_(root_id), - op_type_(op_type), - data_type_(data_type), - hcom_bind_model_(hcom_bind_model), - hcom_unbind_model_(hcom_unbind_model), - hcom_distribute_task_(hcom_distribute_task) {} - ~HcclTaskInfo() override {} - - const std::string &hccl_type() const { return hccl_type_; } - void *input_data_addr() const { return input_data_addr_; } - void *output_data_addr() const { return output_data_addr_; } - void *workspace_addr() const { return workspace_addr_; } - int64_t workspace_size() const { return workspace_size_; } - int64_t hccl_stream_num() const { return hccl_stream_num_; } - const std::vector &private_def() const { return private_def_; } - void *ops_kernel_store() const { return ops_kernel_store_; } - int32_t count() const { return count_; } - int64_t root_id() const { return root_id_; } - int64_t op_type() const { return op_type_; } - int64_t data_type() const { return data_type_; } - std::function hcom_bind_model() const { return hcom_bind_model_; } - std::function hcom_unbind_model() const { return hcom_unbind_model_; } - std::function, void *)> hcom_distribute_task() const { - return hcom_distribute_task_; - } - - private: - std::string hccl_type_; - void *input_data_addr_; - void *output_data_addr_; - void *workspace_addr_; - int64_t workspace_size_; - int64_t hccl_stream_num_; - std::vector private_def_; - void *ops_kernel_store_; - int32_t count_; - int64_t root_id_; - int64_t op_type_; - int64_t data_type_; - std::function hcom_bind_model_; - std::function hcom_unbind_model_; - std::function, void *)> hcom_distribute_task_; -}; - -class ProfilerTraceTaskInfo : public TaskInfo { - public: - ProfilerTraceTaskInfo(uint32_t stream_id, uint64_t log_id, bool notify, uint32_t flat) - : TaskInfo(stream_id, TaskInfoType::PROFILER_TRACE), log_id_(log_id), notify_(notify), flat_(flat) {} - ~ProfilerTraceTaskInfo() override {} - - uint64_t log_id() const { return log_id_; } - bool notify() const { return notify_; } - uint32_t flat() const { return flat_; } - - private: - uint64_t log_id_; - bool notify_; - uint32_t flat_; -}; - -class MemcpyAsyncTaskInfo : public TaskInfo { - public: - MemcpyAsyncTaskInfo(uint32_t stream_id, void *dst, uint64_t dst_max, void *src, uint64_t count, uint32_t kind) - : TaskInfo(stream_id, TaskInfoType::MEMCPY_ASYNC), - dst_(dst), - dst_max_(dst_max), - src_(src), - count_(count), - kind_(kind) {} - ~MemcpyAsyncTaskInfo() override {} - - void *dst() const { return dst_; } - uint64_t dst_max() const { return dst_max_; } - void *src() const { return src_; } - uint64_t count() const { return count_; } - uint32_t kind() const { return kind_; } - - private: - void *dst_; - uint64_t dst_max_; - void *src_; - uint64_t count_; - int32_t kind_; -}; - -class StreamSwitchTaskInfo : public TaskInfo { - public: - StreamSwitchTaskInfo(uint32_t stream_id, int64_t true_stream_id, void *input_addr, void *value_addr, int64_t cond, - int64_t data_type) - : TaskInfo(stream_id, TaskInfoType::STREAM_SWITCH), - true_stream_id_(true_stream_id), - input_addr_(input_addr), - value_addr_(value_addr), - cond_(cond), - data_type_(data_type) {} - ~StreamSwitchTaskInfo() override {} - - int64_t true_stream_id() const { return true_stream_id_; } - void *input_addr() const { return input_addr_; } - void *value_addr() const { return value_addr_; } - int64_t cond() const { return cond_; } - int64_t data_type() const { return data_type_; } - - private: - int64_t true_stream_id_; - void *input_addr_; - void *value_addr_; - int64_t cond_; - int64_t data_type_; -}; - -class StreamActiveTaskInfo : public TaskInfo { - public: - StreamActiveTaskInfo(uint32_t stream_id, uint32_t active_stream_id) - : TaskInfo(stream_id, TaskInfoType::STREAM_ACTIVE), active_stream_id_(active_stream_id) {} - ~StreamActiveTaskInfo() override {} - - uint32_t active_stream_id() const { return active_stream_id_; } - - private: - uint32_t active_stream_id_; -}; -} // namespace model_runner -} // namespace ge - -#endif // INC_FRAMEWORK_GE_RUNTIME_TASK_INFO_H_ diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h index 11d94817..07d78490 100644 --- a/inc/framework/omg/omg.h +++ b/inc/framework/omg/omg.h @@ -23,6 +23,7 @@ #include #include "framework/common/types.h" #include "framework/omg/omg_inner_types.h" +#include "framework/omg/parser/parser_inner_ctx.h" #include "proto/ge_ir.pb.h" #include "proto/om.pb.h" @@ -99,6 +100,11 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const Status GetOutputLeaf(ge::NodePtr node, std::vector> &output_nodes_info, std::vector &output_nodes_name); + +void UpdateOmgCtxWithParserCtx(); + +void UpdateParserCtxWithOmgCtx(); + } // namespace ge namespace domi { diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 118477b1..8e5bc484 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -31,7 +31,7 @@ using domi::DOMI_TENSOR_ND; using domi::DOMI_TENSOR_RESERVED; using domi::domiTensorFormat_t; -using domi::FMK_TYPE_RESERVED; +using domi::FRAMEWORK_RESERVED; using domi::FrameworkType; using std::map; using std::string; @@ -100,7 +100,7 @@ struct OmgContext { std::string ddk_version; // preferential format used by the entire network domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED; - domi::FrameworkType type = domi::FMK_TYPE_RESERVED; + domi::FrameworkType type = domi::FRAMEWORK_RESERVED; RunMode run_mode = ONLY_PRE_CHECK; bool train_flag = false; // whether to use FP16 high precision diff --git a/inc/graph/compute_graph.h b/inc/graph/compute_graph.h index c18b7b5b..4f865f12 100644 --- a/inc/graph/compute_graph.h +++ b/inc/graph/compute_graph.h @@ -80,6 +80,7 @@ class ComputeGraph : public std::enable_shared_from_this, public A Vistor GetOutputNodes() const; NodePtr FindNode(const std::string &name) const; + NodePtr FindFirstNodeMatchType(const std::string &name) const; // AddNode with NodePtr NodePtr AddNode(NodePtr node); NodePtr AddNode(OpDescPtr op); @@ -235,8 +236,6 @@ class ComputeGraph : public std::enable_shared_from_this, public A std::vector &stack); graphStatus BFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, std::deque &stack); - graphStatus BFSTopologicalSortingWithGroup(std::vector &node_vec, - std::map &map_in_edge_num, std::deque &stack); graphStatus CollectBreadthOutNode(const NodePtr &node, std::map &map_in_edge_num, std::map &breadth_node_map); graphStatus TopologicalSortingGraph(); diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index 99dd7774..873952e1 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -136,6 +136,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP_CONV_OP; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS; + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME; @@ -176,6 +179,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_LABEL; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS; // to be deleted GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED; diff --git a/inc/graph/ge_tensor.h b/inc/graph/ge_tensor.h index a434591e..29a315d6 100644 --- a/inc/graph/ge_tensor.h +++ b/inc/graph/ge_tensor.h @@ -102,6 +102,9 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeTensorDesc : public AttrH Format GetOriginFormat() const; void SetOriginFormat(Format originFormat); + void SetName(const std::string &name); + const std::string GetName() const; + DataType GetDataType() const; void SetDataType(DataType dt); diff --git a/inc/graph/utils/graph_utils.h b/inc/graph/utils/graph_utils.h index 15d25251..6c344435 100644 --- a/inc/graph/utils/graph_utils.h +++ b/inc/graph/utils/graph_utils.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "graph/anchor.h" #include "graph/node.h" #include "graph/compute_graph.h" @@ -111,21 +112,25 @@ enum IOType { kIn, kOut }; struct NodeIndexIO { NodeIndexIO(ge::NodePtr node, uint32_t index, IOType io_type) - : node(std::move(node)), index(index), io_type(io_type) {} + : node_(std::move(node)), index_(index), io_type_(io_type) { + if (node_ != nullptr) { + value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_); + } + } NodeIndexIO(ge::NodePtr node, int index, IOType io_type) - : node(std::move(node)), index(static_cast(index)), io_type(io_type) {} + : node_(std::move(node)), index_(static_cast(index)), io_type_(io_type) { + if (node_ != nullptr) { + value_ = node_->GetName() + (io_type_ == kOut ? "_out_" : "_in_") + std::to_string(index_); + } + } ~NodeIndexIO() {} - NodePtr node = nullptr; - uint32_t index = 0; - IOType io_type = kOut; + NodePtr node_ = nullptr; + uint32_t index_ = 0; + IOType io_type_ = kOut; + std::string value_; - std::string ToString() const { - if ((node == nullptr) || (node->GetOwnerComputeGraph() == nullptr)) { - return ""; - } - return node->GetName() + (io_type == kOut ? "_out_" : "_in_") + std::to_string(index); - } + std::string ToString() const { return value_; } }; class GraphUtils { @@ -310,7 +315,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus GetRefMapping(const ComputeGraphPtr &graph, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// @@ -340,7 +345,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus HandleInAnchorMapping(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// @@ -351,7 +356,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus HandleOutAnchorMapping(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// @@ -362,7 +367,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus HandleSubgraphInput(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// @@ -373,7 +378,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus HandleMergeInput(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// @@ -384,7 +389,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus HandleSubgraphOutput(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// @@ -397,7 +402,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol, std::string &symbol); /// @@ -409,7 +414,7 @@ class GraphUtils { /// @return success: GRAPH_SUCESS /// static graphStatus UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol); /// diff --git a/inc/graph/utils/type_utils.h b/inc/graph/utils/type_utils.h index 35b8cf22..aba2bdbf 100644 --- a/inc/graph/utils/type_utils.h +++ b/inc/graph/utils/type_utils.h @@ -25,6 +25,7 @@ #include "graph/types.h" #include "graph/usr_types.h" #include "register/register_types.h" +#include "external/register/register_fmk_types.h" namespace ge { class TypeUtils { @@ -39,6 +40,7 @@ class TypeUtils { static Format SerialStringToFormat(const std::string &str); static Format DataFormatToFormat(const std::string &str); static Format DomiFormatToFormat(domi::domiTensorFormat_t domi_format); + static std::string FmkTypeToSerialString(domi::FrameworkType fmk_type); static graphStatus Usr2DefQuantizeFactorParams(const UsrQuantizeFactorParams &usr, QuantizeFactorParams &def); static graphStatus Def2UsrQuantizeFactorParams(const QuantizeFactorParams &def, UsrQuantizeFactorParams &usr); diff --git a/src/common/graph/compute_graph.cc b/src/common/graph/compute_graph.cc index 0729e64e..b73cf939 100644 --- a/src/common/graph/compute_graph.cc +++ b/src/common/graph/compute_graph.cc @@ -48,63 +48,6 @@ bool IsUseBFS() { } return false; } -bool IsTailingOptimization() { - string is_tailing_optimization_option; - auto ret = GetContext().GetOption(ge::OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, is_tailing_optimization_option); - if (ret == GRAPH_SUCCESS) { - GELOGI("Option ge.exec.isTailingOptimization is %s", is_tailing_optimization_option.c_str()); - // "1" means it's True from frontend option - return is_tailing_optimization_option == "1"; - } - GELOGW("OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION not set, use BFSTopologicalSorting by default."); - return false; -} -bool IsFusedNode(const NodePtr &node) { - bool is_fused_node = false; - AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_FLAG, is_fused_node); - return is_fused_node; -} -string GetGroupId(const NodePtr &node) { - string group_id; - AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id); - return group_id; -} -bool IsGroupEnd(const NodePtr &node) { - if (GetGroupId(node).empty()) { - return false; - } - if (node->GetOutDataNodesSize() == 0) { - return true; - } - for (const auto &out_data_node : node->GetOutDataNodes()) { - if (IsFusedNode(out_data_node)) { - return true; - } - } - return false; -} -void SplitNodeToStack(const std::map &breadth_node_map, string current_group_id, - std::vector &stack_input, std::deque &group_stack, std::deque &stack) { - for (const auto &name_node : breadth_node_map) { - // group first - string group_id; - if (AttrUtils::GetStr(name_node.second->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id)) { - GELOGI("current node %s, group id: %s , current group id %s", name_node.second->GetName().c_str(), - group_id.c_str(), current_group_id.c_str()); - if (!current_group_id.empty() && group_id != current_group_id) { - GELOGI("node go to input_stack back: %s", name_node.second->GetName().c_str()); - (void)stack_input.insert(stack_input.begin(), name_node.second); - } else { - current_group_id = group_id; - GELOGI("node go to group_stack: %s", name_node.second->GetName().c_str()); - (void)group_stack.push_front(name_node.second); - } - continue; - } - GELOGI("node go to stack: %s ", name_node.second->GetName().c_str()); - (void)stack.push_front(name_node.second); - } -} } // namespace GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraph::ComputeGraph(const std::string &name) @@ -193,6 +136,19 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr ComputeGraph::FindNode(co return nullptr; } +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr +ComputeGraph::FindFirstNodeMatchType(const std::string &name) const { + for (const auto &node : nodes_) { + if (node == nullptr) { + continue; + } + if (node->GetType() == name) { + return node; + } + } + return nullptr; +} + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ComputeGraph::GraphAttrsAreEqual( const ComputeGraph &r_graph) const { // ProtoMsgOwner <::google::protobuf::Message> is temporarily ignored @@ -642,9 +598,9 @@ ComputeGraph::UpdateInputMapping(const std::map &input_mappi /// GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::UpdateOutputMapping(const std::map &output_mapping) { - NodePtr net_output = FindNode(NODE_NAME_NET_OUTPUT); + NodePtr net_output = FindFirstNodeMatchType(NETOUTPUT); if (net_output == nullptr) { - GE_LOGE("UpdateOutputMapping failed: node %s not exist in graph.", NODE_NAME_NET_OUTPUT); + GE_LOGE("UpdateOutputMapping failed: node type %s not exist in graph.", NETOUTPUT); return GRAPH_FAILED; } OpDescPtr op_desc = net_output->GetOpDesc(); @@ -799,65 +755,6 @@ graphStatus ComputeGraph::BFSTopologicalSorting(std::vector &node_vec, return GRAPH_SUCCESS; } -graphStatus ComputeGraph::BFSTopologicalSortingWithGroup(std::vector &node_vec, - std::map &map_in_edge_num, - std::deque &stack) { - GELOGI("Runing_Bfs_Sort_With_Group"); - std::string current_group_id; - std::vector stack_input; - std::deque group_stack; - std::deque fused_node_stack; - std::map breadth_node_map; - // Record the number of non data nodes but no input nodes - GE_CHK_BOOL_EXEC(SortNodes(stack_input, map_in_edge_num) == GRAPH_SUCCESS, return GRAPH_FAILED, "sort nodes failed"); - - // Only data nodes here - while (!stack_input.empty() || !stack.empty() || !group_stack.empty()) { - NodePtr node = nullptr; - if (!group_stack.empty()) { - // Traversal node in group has priority - node = group_stack.back(); - group_stack.pop_back(); - } else if (!stack.empty()) { - node = stack.back(); - stack.pop_back(); - } else { - node = stack_input.back(); - stack_input.pop_back(); - } - - if (IsFusedNode(node) && current_group_id.empty()) { - current_group_id = node->GetName(); - } - if (GetGroupId(node).empty() || GetGroupId(node) == current_group_id) { - node_vec.push_back(node); - GE_CHECK_NOTNULL(node->GetOpDesc()); - GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str()); - } else { - if (current_group_id.empty()) { - current_group_id = GetGroupId(node); - node_vec.push_back(node); - GE_CHECK_NOTNULL(node->GetOpDesc()); - GELOGI("node_vec.push_back %s", node->GetOpDesc()->GetName().c_str()); - } else { - GELOGI("current group id is %s ,node go to input_stack back: %s", current_group_id.c_str(), - node->GetName().c_str()); - (void)stack_input.insert(stack_input.begin(), node); - continue; - } - } - CollectBreadthOutNode(node, map_in_edge_num, breadth_node_map); - SplitNodeToStack(breadth_node_map, current_group_id, stack_input, group_stack, stack); - breadth_node_map.clear(); - // check the end of group - if (IsGroupEnd(node)) { - GELOGI("Current node %s is end of group %s.", node->GetName().c_str(), current_group_id.c_str()); - current_group_id = ""; - } - } - return GRAPH_SUCCESS; -} - graphStatus ComputeGraph::CollectBreadthOutNode(const NodePtr &node, std::map &map_in_edge_num, std::map &breadth_node_map) { for (const auto &anchor : node->GetAllOutDataAnchors()) { @@ -907,7 +804,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Topolog } std::vector> subgraphs; - (void)AllGraphNodes(subgraphs); + auto nodes = AllGraphNodes(subgraphs); + for (size_t i = 0; i < nodes.size(); i++) { + NodePtr node = nodes.at(i); // [node: should not be null] + node->GetOpDesc()->SetId(i); // [node->GetOpDesc(): should not be null] + } if (sub_graph_.size() != subgraphs.size()) { // Graph Partition use subgraph, Keep original GELOGW("Keep original subgraph for graph size %zu not equal %zu.", sub_graph_.size(), subgraphs.size()); return SUCCESS; @@ -920,17 +821,10 @@ graphStatus ComputeGraph::TopologicalSortingGraph() { std::vector node_vec; std::map map_in_edge_num; bool use_BFS = IsUseBFS(); - bool is_tailing_optimization = IsTailingOptimization(); if (use_BFS) { std::deque stack; - if (is_tailing_optimization) { - if (BFSTopologicalSortingWithGroup(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { - return GRAPH_FAILED; - } - } else { - if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { - return GRAPH_FAILED; - } + if (BFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { + return GRAPH_FAILED; } } else { std::vector stack; diff --git a/src/common/graph/format_refiner.cc b/src/common/graph/format_refiner.cc index 91d388d0..11a610ce 100644 --- a/src/common/graph/format_refiner.cc +++ b/src/common/graph/format_refiner.cc @@ -41,7 +41,7 @@ using namespace ge; using namespace std; namespace ge { namespace { -static const std::unordered_set kChangeDimNodes = {RESHAPE, PERMUTE, EXPANDDIMS, SQUEEZE}; +static const std::unordered_set kChangeDimNodes = {PERMUTE, EXPANDDIMS, SQUEEZE}; static bool net_format_is_nd = true; static Format g_user_set_format = FORMAT_ND; static bool is_first_infer = true; diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 1f427cf3..1c2c9c71 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -118,6 +118,9 @@ const std::string ATTR_NAME_NAN_OPT = "nan_opt"; const std::string ATTR_NAME_AIPP = "aipp"; const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp"; +const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs"; +const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs"; + const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id"; const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name"; @@ -150,6 +153,7 @@ const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG = "need_stream_cycle_event"; const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id"; const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start"; const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size"; +const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; // To be deleted const std::string ATTR_TO_BE_DELETED = "to_be_deleted"; @@ -1000,7 +1004,7 @@ const std::string ATTR_NAME_FUSION_TYPE_LIST = "_fusion_type_list"; const std::string ATTR_NAME_VALID_INPUT_SHAPE_LIST_LIST = "_valid_input_shape_list_list"; const std::string ATTR_NAME_VALID_OUTPUT_SHAPE_LIST_LIST = "_valid_output_shape_list_list"; const std::string ATTR_NAME_SLICE_INPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; -const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_input_offset_list_list"; +const std::string ATTR_NAME_SLICE_OUTPUT_OFFSET_LIST_LIST = "_output_offset_list_list"; // used for Horovod const std::string ATTR_INTER_EVENT_IDENTIFY = "event_id"; diff --git a/src/common/graph/ge_attr_value.cc b/src/common/graph/ge_attr_value.cc index 004d0227..3a1dec6d 100644 --- a/src/common/graph/ge_attr_value.cc +++ b/src/common/graph/ge_attr_value.cc @@ -1233,6 +1233,11 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr AttrUtils::CloneOpDesc( GELOGE(GRAPH_FAILED, "DelAttr _opt_input failed."); } } + + if (!op_desc->output_name_idx_.empty()) { + op_desc->output_name_idx_.clear(); + } + return op_desc; } diff --git a/src/common/graph/ge_tensor.cc b/src/common/graph/ge_tensor.cc index 5d7b6a2e..8ffbba91 100644 --- a/src/common/graph/ge_tensor.cc +++ b/src/common/graph/ge_tensor.cc @@ -464,6 +464,24 @@ void GeTensorDesc::SetFormat(Format format) { } } +void GeTensorDesc::SetName(const std::string &name) { + auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg(); + if (tensor_descriptor_msg != nullptr) { + tensor_descriptor_msg->set_name(name); + return; + } + GELOGW("[SetName]tensor_descriptor_msg is null."); +} + +const std::string GeTensorDesc::GetName() const { + auto tensor_descriptor_msg = tensor_descriptor_.GetProtoMsg(); + if (tensor_descriptor_msg != nullptr) { + return tensor_descriptor_msg->name(); + } + GELOGW("[GetName]tensor_descriptor_msg is null."); + return ""; +} + Format GeTensorDesc::GetOriginFormat() const { std::string origin_format_str; if (!AttrUtils::GetStr(this, TENSOR_UTILS_ORIGIN_FORMAT, origin_format_str)) { diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk new file mode 100644 index 00000000..744d1725 --- /dev/null +++ b/src/common/graph/graph.mk @@ -0,0 +1,182 @@ +LOCAL_PATH := $(call my-dir) + +COMMON_LOCAL_SRC_FILES := \ + ./proto/om.proto \ + ./proto/ge_ir.proto \ + ./proto/ge_onnx.proto \ + ./proto/insert_op.proto \ + ./proto/task.proto \ + ./proto/fwk_adapter.proto \ + ./proto/op_mapping_info.proto \ + ./anchor.cc \ + ./ge_attr_value.cc \ + ./attr_value.cc \ + ./buffer.cc \ + ./compute_graph.cc \ + ./graph.cc \ + ./inference_context.cc \ + ./shape_refiner.cc \ + ./format_refiner.cc \ + ./ref_relation.cc \ + ./model.cc \ + ./model_serialize.cc \ + ./node.cc \ + ./op_desc.cc \ + ./operator.cc \ + ./operator_factory.cc \ + ./operator_factory_impl.cc \ + ./ge_attr_define.cc \ + ./ge_tensor.cc \ + ./detail/attributes_holder.cc \ + ./utils/anchor_utils.cc \ + ./utils/graph_utils.cc \ + ./utils/ge_ir_utils.cc \ + ./utils/node_utils.cc \ + ./utils/op_desc_utils.cc \ + ./utils/type_utils.cc \ + ./utils/tensor_utils.cc \ + ./tensor.cc \ + ./debug/graph_debug.cc \ + ./opsproto/opsproto_manager.cc \ + ../ops/op_imp.cpp \ + option/ge_context.cc \ + option/ge_local_context.cc \ + ./runtime_inference_context.cc \ + +COMMON_LOCAL_C_INCLUDES := \ + proto/om.proto \ + proto/ge_ir.proto \ + proto_inner/ge_onnx.proto \ + proto/insert_op.proto \ + proto/task.proto \ + proto/fwk_adapter.proto \ + proto/op_mapping_info.proto \ + inc \ + inc/external \ + inc/external/graph \ + inc/graph \ + inc/common \ + common \ + common/graph \ + third_party/protobuf/include \ + libc_sec/include \ + ops/built-in/op_proto/inc \ + + +#compiler for host +include $(CLEAR_VARS) +LOCAL_MODULE := libgraph + +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 +LOCAL_CPPFLAGS += -fexceptions + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_MULTILIB := 64 +LOCAL_PROPRIETARY_MODULE := true + +include $(BUILD_HOST_SHARED_LIBRARY) + + +#compiler for device +include $(CLEAR_VARS) +LOCAL_MODULE := libgraph + +LOCAL_CFLAGS += -O2 + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + +LOCAL_LDFLAGS := -lrt -ldl + +ifeq ($(device_os),android) +LOCAL_LDFLAGS := -ldl +endif + +LOCAL_MULTILIB := 64 +LOCAL_PROPRIETARY_MODULE := true + +include $(BUILD_SHARED_LIBRARY) + + +# compile for ut/st +include $(CLEAR_VARS) +LOCAL_MODULE := libgraph + +LOCAL_CFLAGS += + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_MULTILIB := 64 +LOCAL_PROPRIETARY_MODULE := true + +include $(BUILD_LLT_SHARED_LIBRARY) + + +#compiler for host static lib +include $(CLEAR_VARS) +LOCAL_MODULE := libgraph + +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 +LOCAL_CPPFLAGS += -fexceptions + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := \ + libprotobuf \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libslog \ + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_MULTILIB := 64 +LOCAL_PROPRIETARY_MODULE := true + +include $(BUILD_HOST_STATIC_LIBRARY) + +#compiler for device static lib +include $(CLEAR_VARS) +LOCAL_MODULE := libgraph + +LOCAL_CFLAGS += -O2 + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := \ + libprotobuf \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libslog \ + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_MULTILIB := 64 +LOCAL_PROPRIETARY_MODULE := true + +include $(BUILD_STATIC_LIBRARY) diff --git a/src/common/graph/model_serialize.cc b/src/common/graph/model_serialize.cc index a3b7a936..19cb4538 100644 --- a/src/common/graph/model_serialize.cc +++ b/src/common/graph/model_serialize.cc @@ -130,6 +130,16 @@ bool ModelSerializeImp::SerializeOpDesc(const ConstOpDescPtr &op_desc, proto::Op for (const std::string &name : op_desc->GetSubgraphInstanceNames()) { op_def_proto->add_subgraph_name(name); } + + proto::AttrDef key; + proto::AttrDef value; + for (auto &item : op_desc->output_name_idx_) { + key.mutable_list()->add_s(item.first); + value.mutable_list()->add_i(item.second); + } + auto op_desc_attr = op_def_proto->mutable_attr(); + op_desc_attr->insert({"_output_name_key", key}); + op_desc_attr->insert({"_output_name_value", value}); } return true; } @@ -228,6 +238,25 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY bool ModelSerializeImp::Unseriali } bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_def_proto) { + std::vector key; + std::vector value; + if (op_def_proto.attr().count("_output_name_key") > 0) { + auto &output_name_key_list = op_def_proto.attr().at("_output_name_key").list(); + for (const auto &item_s : output_name_key_list.s()) { + key.push_back(item_s); + } + auto op_desc_attr = op_def_proto.mutable_attr(); + op_desc_attr->erase("_output_name_key"); + } + if (op_def_proto.attr().count("_output_name_value") > 0) { + auto &output_name_value_list = op_def_proto.attr().at("_output_name_value").list(); + for (const auto &item_i : output_name_value_list.i()) { + value.push_back(static_cast(item_i)); + } + auto op_desc_attr = op_def_proto.mutable_attr(); + op_desc_attr->erase("_output_name_value"); + } + op_desc = std::shared_ptr(new (std::nothrow) OpDesc(protobuf_owner_, &op_def_proto)); GE_CHK_BOOL_EXEC(op_desc != nullptr, return false, "op_desc is nullptr."); @@ -253,6 +282,16 @@ bool ModelSerializeImp::UnserializeOpDesc(OpDescPtr &op_desc, proto::OpDef &op_d op_desc->SetSubgraphInstanceName(graph_index++, name); } + if (key.size() != 0) { + if (key.size() != value.size()) { + GELOGE(GRAPH_FAILED, "twe vector size is different. key_size: %zu, value_size: %zu.", key.size(), value.size()); + } else { + for (uint32_t i = 0; i < key.size(); ++i) { + op_desc->output_name_idx_.insert(std::pair(key.at(i), value.at(i))); + } + } + } + return true; } diff --git a/src/common/graph/module.mk b/src/common/graph/module.mk new file mode 100644 index 00000000..1e00b7fc --- /dev/null +++ b/src/common/graph/module.mk @@ -0,0 +1,3 @@ +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/graph.mk diff --git a/src/common/graph/tensor.cc b/src/common/graph/tensor.cc index d5d304b7..0d511645 100644 --- a/src/common/graph/tensor.cc +++ b/src/common/graph/tensor.cc @@ -589,6 +589,7 @@ GeTensorDesc TensorAdapter::TensorDesc2GeTensorDesc(const TensorDesc &tensor_des tensor_desc.GetDataType()); ge_tensor_desc.SetOriginShape(GeShape(tensor_desc.GetOriginShape().GetDims())); ge_tensor_desc.SetOriginFormat(tensor_desc.GetOriginFormat()); + ge_tensor_desc.SetName(tensor_desc.GetName()); std::vector> shape_range; auto status = tensor_desc.GetShapeRange(shape_range); if (status != GRAPH_SUCCESS) { @@ -613,6 +614,7 @@ TensorDesc TensorAdapter::GeTensorDesc2TensorDesc(const GeTensorDesc &ge_tensor_ ge_tensor_desc.GetDataType()); tensor_desc.SetOriginShape(Shape(ge_tensor_desc.GetOriginShape().GetDims())); tensor_desc.SetOriginFormat(ge_tensor_desc.GetOriginFormat()); + tensor_desc.SetName(ge_tensor_desc.GetName()); std::vector> shape_range; auto status = ge_tensor_desc.GetShapeRange(shape_range); if (status != GRAPH_SUCCESS) { diff --git a/src/common/graph/utils/graph_utils.cc b/src/common/graph/utils/graph_utils.cc index c4057c95..ca2ebcdc 100644 --- a/src/common/graph/utils/graph_utils.cc +++ b/src/common/graph/utils/graph_utils.cc @@ -1336,7 +1336,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY ComputeGraphPtr GraphUtils::FindR /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::GetRefMapping(const ComputeGraphPtr &graph, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { GE_CHECK_NOTNULL(graph); for (auto &node : graph->GetAllNodes()) { @@ -1384,7 +1384,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY NodePtr GraphUtils::FindNodeFromA /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { GE_CHECK_NOTNULL(node); @@ -1402,7 +1402,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, } for (auto &in_data_anchor : node->GetAllInDataAnchors()) { - NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn); + NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn); OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); if (peer_out_anchor == nullptr) { std::string symbol = cur_node_info.ToString(); @@ -1410,7 +1410,7 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, symbol_to_anchors[symbol] = {cur_node_info}; anchor_to_symbol[symbol] = symbol; } else { - NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); + NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { GE_LOGE("Update symbol mapping failed."); return GRAPH_FAILED; @@ -1429,18 +1429,18 @@ graphStatus GraphUtils::HandleInAnchorMapping(const NodePtr &node, /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { GE_CHECK_NOTNULL(node); for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - NodeIndexIO cur_node_info = NodeIndexIO(node, out_data_anchor->GetIdx(), kOut); + NodeIndexIO cur_node_info(node, out_data_anchor->GetIdx(), kOut); if (anchor_to_symbol.find(cur_node_info.ToString()) != anchor_to_symbol.end()) { continue; } int32_t reuse_in_index = -1; if (IsRefFromInput(out_data_anchor, reuse_in_index)) { - NodeIndexIO exist_node_info = NodeIndexIO(node, reuse_in_index, kIn); + NodeIndexIO exist_node_info(node, reuse_in_index, kIn); if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { GE_LOGE("Update symbol mapping failed."); return GRAPH_FAILED; @@ -1448,7 +1448,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, } else { std::string symbol = cur_node_info.ToString(); GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str()); - symbol_to_anchors.emplace(std::make_pair(symbol, std::vector{cur_node_info})); + symbol_to_anchors.emplace(std::make_pair(symbol, std::list{cur_node_info})); anchor_to_symbol.emplace(std::make_pair(symbol, symbol)); } } @@ -1464,7 +1464,7 @@ graphStatus GraphUtils::HandleOutAnchorMapping(const NodePtr &node, /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); @@ -1482,8 +1482,8 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, OutDataAnchorPtr peer_out_anchor = parent_in_anchor->GetPeerOutAnchor(); if (peer_out_anchor != nullptr) { // Data has and only has one input - NodeIndexIO cur_node_info = NodeIndexIO(node, 0, kIn); - NodeIndexIO exist_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); + NodeIndexIO cur_node_info(node, 0, kIn); + NodeIndexIO exist_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); if (UpdateRefMapping(cur_node_info, exist_node_info, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) { GE_LOGE("Update symbol mapping failed."); return GRAPH_FAILED; @@ -1501,7 +1501,7 @@ graphStatus GraphUtils::HandleSubgraphInput(const NodePtr &node, /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::HandleMergeInput(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { GE_CHECK_NOTNULL(node); std::vector exist_node_infos; @@ -1574,7 +1574,7 @@ graphStatus GraphUtils::HandleMergeInput(const NodePtr &node, /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { GE_CHECK_NOTNULL(node); ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); @@ -1595,8 +1595,8 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, } GE_CHECK_NOTNULL(parent_node->GetOutDataAnchor(index)); // Union symbol of peer_out_anchor & parent_out_anchor - NodeIndexIO peer_node_info = NodeIndexIO(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); - NodeIndexIO parent_node_info = NodeIndexIO(parent_node, index, kOut); + NodeIndexIO peer_node_info(peer_out_anchor->GetOwnerNode(), peer_out_anchor->GetIdx(), kOut); + NodeIndexIO parent_node_info(parent_node, index, kOut); std::string symbol; if ((UnionSymbolMapping(peer_node_info, parent_node_info, symbol_to_anchors, anchor_to_symbol, symbol) != GRAPH_SUCCESS) || @@ -1606,7 +1606,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, return GRAPH_FAILED; } - NodeIndexIO cur_node_info = NodeIndexIO(node, in_data_anchor->GetIdx(), kIn); + NodeIndexIO cur_node_info(node, in_data_anchor->GetIdx(), kIn); GELOGD("Add anchor %s, symbol %s.", cur_node_info.ToString().c_str(), symbol.c_str()); symbol_to_anchors[symbol].emplace_back(cur_node_info); anchor_to_symbol.emplace(std::make_pair(cur_node_info.ToString(), symbol)); @@ -1625,7 +1625,7 @@ graphStatus GraphUtils::HandleSubgraphOutput(const NodePtr &node, /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, const NodeIndexIO &exist_node_info2, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol, std::string &symbol) { std::string symbol1 = anchor_to_symbol[exist_node_info1.ToString()]; std::string symbol2 = anchor_to_symbol[exist_node_info2.ToString()]; @@ -1675,7 +1675,7 @@ graphStatus GraphUtils::UnionSymbolMapping(const NodeIndexIO &exist_node_info1, /// @return success: GRAPH_SUCESS /// graphStatus GraphUtils::UpdateRefMapping(const NodeIndexIO &cur_node_info, const NodeIndexIO &exist_node_info, - std::map> &symbol_to_anchors, + std::map> &symbol_to_anchors, std::map &anchor_to_symbol) { auto iter1 = anchor_to_symbol.find(exist_node_info.ToString()); if (iter1 == anchor_to_symbol.end()) { diff --git a/src/common/graph/utils/op_desc_utils.cc b/src/common/graph/utils/op_desc_utils.cc index 886a2952..6264ddb9 100644 --- a/src/common/graph/utils/op_desc_utils.cc +++ b/src/common/graph/utils/op_desc_utils.cc @@ -524,7 +524,6 @@ OpDescPtr OpDescUtils::CreateConstOp(const GeTensorPtr &tensor_ptr) { return nullptr; } - GE_CHK_BOOL_EXEC(const_opdesc != nullptr, return nullptr, "const_opdesc is nullptr!"); CHECK_FALSE_EXEC(SetWeights(const_opdesc, tensor_ptr) == ge::GRAPH_SUCCESS, return nullptr); const_opdesc->SetType(CONSTANT); diff --git a/src/common/graph/utils/tensor_utils.cc b/src/common/graph/utils/tensor_utils.cc index 072673c0..674cab55 100644 --- a/src/common/graph/utils/tensor_utils.cc +++ b/src/common/graph/utils/tensor_utils.cc @@ -273,6 +273,7 @@ static graphStatus CalcTensorElementCnt(const std::vector &dims, Format case FORMAT_FRACTAL_Z: graph_status = CalcElementCntOfFractalZ(dims, data_type, element_cnt); break; + case FORMAT_NC1HWC0_C04: case FORMAT_FRACTAL_NZ: case FORMAT_FRACTAL_ZZ: case FORMAT_NDHWC: @@ -283,6 +284,7 @@ static graphStatus CalcTensorElementCnt(const std::vector &dims, Format case FORMAT_FRACTAL_Z_3D_TRANSPOSE: case FORMAT_NDC1HWC0: case FORMAT_FRACTAL_Z_C04: + case FORMAT_FRACTAL_ZN_LSTM: graph_status = CalcElementCntByDims(dims, element_cnt); break; default: diff --git a/src/common/graph/utils/type_utils.cc b/src/common/graph/utils/type_utils.cc index 7d78db5f..e4986931 100644 --- a/src/common/graph/utils/type_utils.cc +++ b/src/common/graph/utils/type_utils.cc @@ -59,6 +59,7 @@ static const std::map kFormatToStringMap = { {FORMAT_CN, "CN"}, {FORMAT_NC, "NC"}, {FORMAT_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"}, + {FORMAT_FRACTAL_Z_G, "FRACTAL_Z_G"}, {FORMAT_RESERVED, "FORMAT_RESERVED"}, {FORMAT_ALL, "ALL"}}; @@ -98,8 +99,9 @@ static const std::unordered_set kInternalFormat = {"NC1HWC0", "FRACTAL_NZ", "NDC1HWC0", "FORMAT_FRACTAL_Z_3D", - "FORMAT_FRACTAL_Z_3D_TRANSPOSE" - "FORMAT_FRACTAL_ZN_LSTM"}; + "FORMAT_FRACTAL_Z_3D_TRANSPOSE", + "FORMAT_FRACTAL_ZN_LSTM", + "FORMAT_FRACTAL_Z_G"}; static const std::map kDataFormatMap = { {"NCHW", FORMAT_NCHW}, {"NHWC", FORMAT_NHWC}, {"NDHWC", FORMAT_NDHWC}, {"NCDHW", FORMAT_NCDHW}, {"ND", FORMAT_ND}}; @@ -143,6 +145,7 @@ static const std::map kStringToFormatMap = { {"CN", FORMAT_CN}, {"NC", FORMAT_NC}, {"FRACTAL_ZN_LSTM", FORMAT_FRACTAL_ZN_LSTM}, + {"FRACTAL_Z_G", FORMAT_FRACTAL_Z_G}, {"FORMAT_RESERVED", FORMAT_RESERVED}, {"ALL", FORMAT_ALL}}; @@ -235,6 +238,11 @@ static const std::map kDataTypeToLength = { {DT_RESOURCE, sizeof(uint64_t)}, }; +static const std::map kFmkTypeToString = { + {domi::CAFFE, "caffe"}, {domi::MINDSPORE, "mindspore"}, {domi::TENSORFLOW, "tensorflow"}, + {domi::ANDROID_NN, "android_nn"}, {domi::ONNX, "onnx"}, {domi::FRAMEWORK_RESERVED, "framework_reserved"}, +}; + bool TypeUtils::IsDataTypeValid(DataType dt) { uint32_t num = static_cast(dt); GE_CHK_BOOL_EXEC((num <= DT_UNDEFINED), return false, "The DataType is invalid"); @@ -312,6 +320,16 @@ Format TypeUtils::DomiFormatToFormat(domi::domiTensorFormat_t domi_format) { return FORMAT_RESERVED; } +std::string TypeUtils::FmkTypeToSerialString(domi::FrameworkType fmk_type) { + auto it = kFmkTypeToString.find(fmk_type); + if (it != kFmkTypeToString.end()) { + return it->second; + } else { + GELOGW("Framework type not support %d.", fmk_type); + return ""; + } +} + static inline void CopyDataFromBuffer(vector &data, const Buffer &buffer) { data.clear(); if (buffer.GetData() != nullptr && buffer.GetSize() != 0) { diff --git a/src/ge/CMakeLists.txt b/src/ge/CMakeLists.txt index 97d349c0..894eaf1e 100755 --- a/src/ge/CMakeLists.txt +++ b/src/ge/CMakeLists.txt @@ -45,7 +45,7 @@ include_directories(${GE_SOURCE_DIR}/inc/external) include_directories(${GE_SOURCE_DIR}/inc/external/graph) include_directories(${GE_SOURCE_DIR}/inc/framework) include_directories(${GE_SOURCE_DIR}/inc/framework/common) -include_directories(${GE_SOURCE_DIR}/inc/runtime) +include_directories(${GE_SOURCE_DIR}/inc/graph) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce) @@ -108,6 +108,10 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "graph/partition/engine_place.cc" "graph/partition/graph_partition.cc" "graph/passes/*.cc" + "graph/preprocess/graph_preprocess.cc" + "graph/preprocess/insert_op/ge_aipp_op.cc" + "graph/preprocess/insert_op/util_insert_aipp_op.cc" + "graph/preprocess/multi_batch_copy_graph.cc" "host_kernels/add_kernel.cc" "host_kernels/broadcast_args_kernel.cc" "host_kernels/broadcast_gradient_args_kernel.cc" @@ -144,10 +148,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "host_kernels/transdata_kernel.cc" "host_kernels/transpose_kernel.cc" "host_kernels/unpack_kernel.cc" - "graph/preprocess/graph_preprocess.cc" - "graph/preprocess/insert_op/ge_aipp_op.cc" - "graph/preprocess/insert_op/util_insert_aipp_op.cc" - "graph/preprocess/multi_batch_copy_graph.cc" + "host_kernels/unsqueeze_kernel.cc" "hybrid/common/npu_memory_allocator.cc" "hybrid/common/tensor_value.cc" "hybrid/executor/*.cc" @@ -155,6 +156,7 @@ file(GLOB TRAIN_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "hybrid/hybrid_davinci_model.cc" "hybrid/model/*.cc" "hybrid/node_executor/aicore/*.cc" + "hybrid/node_executor/aicpu/aicpu_ext_info.cc" "hybrid/node_executor/aicpu/aicpu_node_executor.cc" "hybrid/node_executor/compiledsubgraph/known_node_executor.cc" "hybrid/node_executor/hostcpu/ge_local_node_executor.cc" @@ -246,6 +248,10 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "graph/partition/engine_place.cc" "graph/partition/graph_partition.cc" "graph/passes/*.cc" + "graph/preprocess/graph_preprocess.cc" + "graph/preprocess/insert_op/ge_aipp_op.cc" + "graph/preprocess/insert_op/util_insert_aipp_op.cc" + "graph/preprocess/multi_batch_copy_graph.cc" "host_kernels/add_kernel.cc" "host_kernels/broadcast_args_kernel.cc" "host_kernels/broadcast_gradient_args_kernel.cc" @@ -282,11 +288,8 @@ file(GLOB INFER_SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "host_kernels/transdata_kernel.cc" "host_kernels/transpose_kernel.cc" "host_kernels/unpack_kernel.cc" + "host_kernels/unsqueeze_kernel.cc" "hybrid/hybrid_davinci_model_stub.cc" - "graph/preprocess/graph_preprocess.cc" - "graph/preprocess/insert_op/ge_aipp_op.cc" - "graph/preprocess/insert_op/util_insert_aipp_op.cc" - "graph/preprocess/multi_batch_copy_graph.cc" "init/gelib.cc" "ir_build/atc_ir_common.cc" "ir_build/ge_ir_build.cc" diff --git a/src/ge/client/ge_api.cc b/src/ge/client/ge_api.cc index 51a0accd..ae6a9892 100644 --- a/src/ge/client/ge_api.cc +++ b/src/ge/client/ge_api.cc @@ -29,6 +29,7 @@ #include "graph/utils/type_utils.h" #include "graph/manager/util/rt_context_util.h" #include "register/op_registry.h" +#include "common/ge/tbe_plugin_manager.h" using domi::GetContext; using domi::OpRegistry; @@ -132,6 +133,9 @@ Status GEInitialize(const std::map &options) { } GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid"); + GE_TIMESTAMP_START(InitPreparation); + TBEPluginManager::Instance().InitPreparation(options); + GE_TIMESTAMP_END(InitPreparation, "GEInitialize::InitPreparation"); // call Initialize GELOGT(TRACE_RUNNING, "Initializing environment"); GE_TIMESTAMP_START(GELibInitialize); @@ -178,6 +182,10 @@ Status GEFinalize() { ret = middle_ret; } } + middle_ret = TBEPluginManager::Instance().Finalize(); + if (middle_ret != SUCCESS) { + ret = middle_ret; + } if (kGeInitialized && ret == SUCCESS) { // Unified destruct rt_context @@ -262,10 +270,10 @@ Status Session::AddGraph(uint32_t graph_id, const Graph &graph) { } Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map &options) { - GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, sessinon_id: %lu.", graph_id, sessionId_); + GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Sesson."); + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session."); return FAILED; } GELOGD("Adding graph to session"); @@ -340,7 +348,7 @@ void PrintOutputResult(std::vector &outputs) { GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast(outputs[0].GetData()) + i)); break; default: - GELOGI("Output datatype %s is not support print.", TypeUtils::DataTypeToSerialString(data_type).c_str()); + GELOGI("Output datatype %s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str()); return; } } @@ -378,6 +386,21 @@ Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback); } +Status Session::BuildGraph(uint32_t graph_id, const std::vector &inputs) { + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed"); + return FAILED; + } + GELOGT(TRACE_RUNNING, "Building Graph"); + Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs); + if (ret != SUCCESS) { + GELOGE(ret, "Session BuildGraph failed"); + return FAILED; + } + return SUCCESS; +} + Status Session::RunGraphAsync(uint32_t graph_id, const std::vector &inputs, RunAsyncCallback callback) { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); diff --git a/src/ge/client/module.mk b/src/ge/client/module.mk new file mode 100644 index 00000000..9224a0db --- /dev/null +++ b/src/ge/client/module.mk @@ -0,0 +1,111 @@ + +LOCAL_PATH := $(call my-dir) + +COMMON_LOCAL_SRC_FILES := \ + proto/ge_api.proto \ + ge_api.cc \ + + +COMMON_LOCAL_C_INCLUDES := \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/om.proto \ + proto/insert_op.proto \ + $(LOCAL_PATH) ./ \ + $(LOCAL_PATH)/../ \ + $(LOCAL_PATH)/../../ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/common \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/graph \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)ops/built-in/op_proto/inc \ + third_party/json/include \ + third_party/protobuf/include \ + third_party/opencv/include \ + +DEVICE_LOCAL_C_INCLUDES := \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/om.proto \ + proto/insert_op.proto \ + $(LOCAL_PATH) ./ \ + $(LOCAL_PATH)/../ \ + $(LOCAL_PATH)/../../ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/common \ + $(TOPDIR)inc/graph \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)ops/built-in/op_proto/inc \ + third_party/json/include \ + third_party/protobuf/include \ + third_party/opencv/include \ + +#compiler for host infer +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_client +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + libge_compiler \ + libge_common \ + + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_SHARED_LIBRARIES += \ + libruntime \ + +include $(BUILD_HOST_SHARED_LIBRARY) + +#compiler for device +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_client +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 +LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1 +LOCAL_MODULE_CLASS := SHARED_LIBRARIES + +LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + libruntime \ + libge_compiler \ + libge_common \ + + +LOCAL_LDFLAGS := -lrt -ldl +LOCAL_CFLAGS += \ + -Wall + +include $(BUILD_SHARED_LIBRARY) diff --git a/src/ge/common/auth/file_saver.cc b/src/ge/common/auth/file_saver.cc index 1dc42fad..4aaf9c19 100644 --- a/src/ge/common/auth/file_saver.cc +++ b/src/ge/common/auth/file_saver.cc @@ -40,9 +40,8 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { } char real_path[PATH_MAX] = {0}; - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_path.length() >= PATH_MAX, return FAILED, "File path is longer than PATH_MAX!"); GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr, - GELOGI("File %s is not exit, it will be created.", file_path.c_str())); + GELOGI("File %s is not exist, it will be created.", file_path.c_str())); // Open file mode_t mode = S_IRUSR | S_IWUSR; fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); diff --git a/src/ge/common/ge/plugin_manager.cc b/src/ge/common/ge/plugin_manager.cc index 458b584d..c56b2a2a 100644 --- a/src/ge/common/ge/plugin_manager.cc +++ b/src/ge/common/ge/plugin_manager.cc @@ -50,13 +50,13 @@ PluginManager::~PluginManager() { ClearHandles_(); } string PluginManager::GetPath() { Dl_info dl_info; if (dladdr(reinterpret_cast(&PluginManager::GetPath), &dl_info) == 0) { - GELOGW("Failed to read so_path!"); + GELOGW("Failed to read the shared library file path!"); return string(); } else { std::string so_path = dl_info.dli_fname; char path[PATH_MAX] = {0}; if (so_path.length() >= PATH_MAX) { - GELOGW("File path is too long!"); + GELOGW("The shared library file path is too long!"); return string(); } if (realpath(so_path.c_str(), path) == nullptr) { @@ -93,11 +93,15 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec std::vector path_vec; SplitPath(path, path_vec); for (const auto &single_path : path_vec) { - GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, "File path is too long!"); + GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, + GELOGE(GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!"); continue); // load break when number of loaded so reach maximum if (num_of_loaded_so >= kMaxNumOfSo) { - GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo); + GELOGW( + "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," + " and only the first %d shared libraries will be loaded.", + kMaxNumOfSo); break; } @@ -110,11 +114,11 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec int64_t file_size = 0; if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) { - GELOGW("Failed to validate so %s", file_path_dlopen.c_str()); + GELOGW("Failed to validate the shared library: %s", file_path_dlopen.c_str()); continue; } - GELOGI("dlopen so path name: %s. ", file_path_dlopen.c_str()); + GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str()); // load continue when dlopen is failed auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); @@ -128,14 +132,14 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec for (const auto &func_name : func_check_list) { auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); if (real_fn == nullptr) { - GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not exist!", func_name.c_str(), + GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), func_name.c_str()); is_valid = false; break; } } if (!is_valid) { - GE_LOGE_IF(dlclose(handle), "Failed to dlclose ret"); + GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); continue; } @@ -146,13 +150,13 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec num_of_loaded_so++; } - GELOGI("load so total num %u", num_of_loaded_so); + GELOGI("The total number of shared libraries loaded: %u", num_of_loaded_so); for (auto name : so_list_) { - GELOGI("load %s successfully", name.c_str()); + GELOGI("load shared library %s successfully", name.c_str()); } if (num_of_loaded_so == 0) { - GELOGW("Failed to find any valid so in path %s!", path.c_str()); + GELOGW("No loadable shared library found in the path: %s", path.c_str()); return SUCCESS; } @@ -163,7 +167,7 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded // read file size struct stat stat_buf; if (stat(file_path.c_str(), &stat_buf) != 0) { - GELOGW("%s check fail.", file_path.c_str()); + GELOGW("The shared library file check failed: %s", file_path.c_str()); return FAILED; } @@ -178,8 +182,8 @@ Status PluginManager::ValidateSo(const string &file_path, int64_t size_of_loaded // load continue if the total size of so reaches maximum when it is loaded if (size_of_loaded_so + file_size > kMaxSizeOfLoadedSo) { GELOGW( - "%s is skipped because the size of loaded so reaches maximum if it is load! " - "(size: %ldB, size of loaded so: %ldB, maximum: %dB)", + "%s is skipped because the size of loaded share library reaches maximum if it is loaded! " + "(size: %ldB, size of loaded share library: %ldB, maximum: %dB)", file_path.c_str(), file_size, size_of_loaded_so, kMaxSizeOfLoadedSo); return FAILED; } @@ -227,7 +231,10 @@ Status PluginManager::Load(const string &path, const vector &func_check_ // load break when number of loaded so reach maximum if (num_of_loaded_so >= kMaxNumOfSo) { - GELOGW("Number of loaded so reaches maximum, only the first %d are loaded!", kMaxNumOfSo); + GELOGW( + "The number of dynamic libraries loaded exceeds the kMaxNumOfSo," + " and only the first %d shared libraries will be loaded.", + kMaxNumOfSo); break; } @@ -240,7 +247,7 @@ Status PluginManager::Load(const string &path, const vector &func_check_ int64_t file_size = 0; if (ValidateSo(file_path_dlopen, size_of_loaded_so, file_size) != SUCCESS) { - GELOGW("Failed to validate so %s", canonical_path_str.c_str()); + GELOGW("Failed to validate the shared library: %s", canonical_path_str.c_str()); continue; } @@ -266,8 +273,7 @@ Status PluginManager::Load(const string &path, const vector &func_check_ } } if (!is_valid) { - GE_LOGE_IF(dlclose(handle), "Dlclose ret fail"); - GELOGW("Dlclose ret fail!"); + GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); continue; } @@ -279,7 +285,7 @@ Status PluginManager::Load(const string &path, const vector &func_check_ } closedir(dir); if (num_of_loaded_so == 0) { - GELOGW("Failed to find any valid so under %s!", path.c_str()); + GELOGW("No loadable shared library found in the path: %s", path.c_str()); return SUCCESS; } diff --git a/src/ge/common/ge/tbe_plugin_manager.cc b/src/ge/common/ge/tbe_plugin_manager.cc new file mode 100644 index 00000000..cdce243c --- /dev/null +++ b/src/ge/common/ge/tbe_plugin_manager.cc @@ -0,0 +1,293 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/ge/tbe_plugin_manager.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/ge/ge_util.h" +#include "framework/common/debug/log.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" +#include "framework/common/ge_inner_error_codes.h" +#include "framework/engine/dnnengine.h" +#include "framework/omg/omg_inner_types.h" +#include "external/ge/ge_api_types.h" +#include "register/op_registry.h" +#include "graph/opsproto_manager.h" +#include "graph/utils/type_utils.h" + +namespace ge { +std::map TBEPluginManager::options_ = {}; + +// Get Singleton Instance +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginManager::Instance() { + static TBEPluginManager instance_ptr_; + return instance_ptr_; +} + +Status TBEPluginManager::ClearHandles_() { + Status ret = SUCCESS; + for (const auto &handle : handles_vec_) { + if (dlclose(handle) != 0) { + ret = FAILED; + GELOGW("Failed to close handle: %s", dlerror()); + } + } + handles_vec_.clear(); + return ret; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finalize() { + Status ret = ClearHandles_(); + return ret; +} + +string TBEPluginManager::GetPath() { + Dl_info dl_info; + if (dladdr(reinterpret_cast(&TBEPluginManager::GetPath), &dl_info) == 0) { + GELOGW("Failed to read so path!"); + return string(); + } else { + string so_path = dl_info.dli_fname; + char path[PATH_MAX] = {0}; + if (so_path.length() >= PATH_MAX) { + GELOGW("File path is too long!"); + return string(); + } + if (realpath(so_path.c_str(), path) == nullptr) { + GELOGW("Failed to get realpath of %s", so_path.c_str()); + return string(); + } + + so_path = path; + so_path = so_path.substr(0, so_path.rfind('/') + 1); + return so_path; + } +} + +void TBEPluginManager::ProcessSoFullName(vector &file_list, string &caffe_parser_path, string &full_name, + const string &caffe_parser_so_suff, const string &aicpu_so_suff, + const string &aicpu_host_so_suff) { + if (full_name.size() >= caffe_parser_so_suff.size() && + full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), + caffe_parser_so_suff) == 0) { + caffe_parser_path = full_name; + } else if ((full_name.size() >= aicpu_so_suff.size() && + full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == 0) || + (full_name.size() >= aicpu_host_so_suff.size() && + full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), + aicpu_host_so_suff) == 0)) { + // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage. + domi::GetContext().aicpu_op_run_paths.push_back(full_name); + } else { + // Save parser so path into file_list vector + file_list.push_back(full_name); + } +} + +void TBEPluginManager::FindParserSo(const string &path, vector &file_list, string &caffe_parser_path) { + // Path, change to absolute path + string real_path = RealPath(path.c_str()); + // Plugin path does not exist + if (real_path.empty()) { + GELOGW("RealPath is empty."); + return; + } + struct stat stat_buf; + if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { + GELOGW("%s is not a dir.", real_path.c_str()); + return; + } + struct dirent *dent(0); + DIR *dir = opendir(real_path.c_str()); + // Plugin path does not exist + if (dir == nullptr) { + GELOGW("Open directory %s failed.", real_path.c_str()); + return; + } + + while ((dent = readdir(dir)) != nullptr) { + if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; + string name = dent->d_name; + string full_name = real_path + "/" + name; + const string so_suff = ".so"; + const string caffe_parser_so_suff = "lib_caffe_parser.so"; + const string aicpu_so_suff = "_aicpu.so"; + const string aicpu_host_so_suff = "_online.so"; + if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { + ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, + aicpu_host_so_suff); + } else { + FindParserSo(full_name, file_list, caffe_parser_path); + } + } + closedir(dir); +} + +void TBEPluginManager::GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path) { + // Support to split multiple so directories by ":" + vector v_path = StringUtils::Split(path, ':'); + for (size_t i = 0; i < v_path.size(); ++i) { + FindParserSo(v_path[i], file_list, caffe_parser_path); + GELOGI("CustomOpLib full name = %s", v_path[i].c_str()); + } +} + +void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { + GELOGI("Enter get custom op path schedule"); + std::string fmk_type; + domi::FrameworkType type = domi::TENSORFLOW; + auto it = options_.find(FRAMEWORK_TYPE); + if (it != options_.end()) { + type = static_cast(std::strtol(it->second.c_str(), nullptr, 10)); + } + fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); + GELOGI("Framework type is %s.", fmk_type.c_str()); + + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + std::string path = path_env; + customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); + GELOGI("Get custom so path from env : %s", path_env); + return; + } + std::string path_base = GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + customop_path = (path_base + "ops/framework/custom" + "/:") + (path_base + "ops/framework/built-in/" + fmk_type); + return; +} + +void TBEPluginManager::LoadCustomOpLib() { + LoadPluginSo(); + + std::vector registration_datas = domi::OpRegistry::Instance()->registrationDatas; + GELOGI("The size of registration_datas is: %zu", registration_datas.size()); + for (OpRegistrationData reg_data : registration_datas) { + bool ret = CheckRegisterStatus(reg_data); + if (ret) { + GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(), + static_cast(reg_data.GetImplyType())); + domi::OpRegistry::Instance()->Register(reg_data); + } + } +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPluginSo() { + vector file_list; + string caffe_parser_path; + std::string plugin_path; + GetCustomOpPath(plugin_path); + + // Whether there are files in the plugin so path + GetPluginSoFileList(plugin_path, file_list, caffe_parser_path); + + // No file + if (file_list.empty()) { + // Print log + GELOGW("Can not find any plugin file in plugin_path: %s", plugin_path.c_str()); + } + + GELOGW("The shared library will not be checked. Please ensure that the source of the shared library is trusted."); + + // Load other so files except lib_caffe_parser.so in the plugin so path + for (auto elem : file_list) { + StringUtils::Trim(elem); + + void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); + if (handle == nullptr) { + GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror()); + } else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) { + // Close dl when the program exist, not close here + GELOGI("Plugin load %s success.", elem.c_str()); + handles_vec_.push_back(handle); + } else { + GELOGI("Plugin so has already been loaded, no need to load again."); + } + } +} + +bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData ®_data) { + bool ret = true; + static char *parser_priority = std::getenv("PARSER_PRIORITY"); + static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce"; + auto ori_optype_set = reg_data.GetOriginOpTypeSet(); + for (const auto &op_type : ori_optype_set) { + domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type); + GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str()); + if (imply_type != domi::ImplyType::BUILDIN) { + if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) || + (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) { + GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(), + reg_data.GetOmOptype().c_str()); + ret = false; + } else { + GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str()); + } + } else { + GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(), + reg_data.GetOmOptype().c_str(), static_cast(reg_data.GetImplyType())); + } + } + return ret; +} + +Status TBEPluginManager::CheckCustomAiCpuOpLib() { + std::vector vec_op_type; + + domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); + for (size_t i = 0; i < vec_op_type.size(); i++) { + bool aicpu_so_exist = false; + std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; + for (size_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { + string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; + if (bin_file_path.size() >= ai_cpu_so_name.size() && + bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == + 0) { + aicpu_so_exist = true; + break; + } + } + if (!aicpu_so_exist) { + GELOGE(FAILED, "Can't find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); + return FAILED; + } + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::InitPreparation( + const std::map &options) { + options_.insert(options.begin(), options.end()); + // Load TBE plugin + TBEPluginManager::Instance().LoadCustomOpLib(); + Status ret = CheckCustomAiCpuOpLib(); + if (ret != SUCCESS) { + GELOGE(ret, "Check custom aicpu run so failed!"); + return; + } +} +} // namespace ge diff --git a/src/ge/common/ge/tbe_plugin_manager.h b/src/ge/common/ge/tbe_plugin_manager.h new file mode 100644 index 00000000..c2ad99b1 --- /dev/null +++ b/src/ge/common/ge/tbe_plugin_manager.h @@ -0,0 +1,73 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ +#define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "external/ge/ge_api_error_codes.h" +#include "external/register/register.h" + +namespace ge { +using SoHandlesVec = std::vector; +using std::function; +using std::map; +using std::string; +using std::vector; + +class TBEPluginManager { + public: + Status Finalize(); + + // Get TBEPluginManager singleton instance + static TBEPluginManager &Instance(); + + static string GetPath(); + + static void InitPreparation(const std::map &options); + + void LoadPluginSo(); + + private: + TBEPluginManager() = default; + ~TBEPluginManager() = default; + Status ClearHandles_(); + + static void ProcessSoFullName(vector &file_list, string &caffe_parser_path, string &full_name, + const string &caffe_parser_so_suff, const string &aicpu_so_suff, + const string &aicpu_host_so_suff); + static void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path); + static void GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path); + static void GetCustomOpPath(std::string &customop_path); + void LoadCustomOpLib(); + static Status CheckCustomAiCpuOpLib(); + static bool CheckRegisterStatus(const OpRegistrationData ®_data); + + SoHandlesVec handles_vec_; + static std::map options_; +}; +} // namespace ge + +#endif // GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ diff --git a/src/ge/common/ge_common.mk b/src/ge/common/ge_common.mk new file mode 100644 index 00000000..e913c8f5 --- /dev/null +++ b/src/ge/common/ge_common.mk @@ -0,0 +1,241 @@ +LOCAL_PATH := $(call my-dir) + +GE_COMMON_LOCAL_SRC_FILES := \ + context/ctx.cc \ + model_saver.cc \ + ge/datatype_util.cc \ + helper/om_file_helper.cc \ + helper/model_helper.cc \ + ../model/ge_model.cc \ + auth/file_saver.cc \ + fp16_t.cc \ + math/fp16_math.cc \ + debug/memory_dumper.cc \ + formats/utils/formats_trans_utils.cc \ + formats/format_transfers/datatype_transfer.cc \ + formats/format_transfers/format_transfer_transpose.cc \ + formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ + formats/format_transfers/format_transfer_fractal_z.cc \ + formats/format_transfers/format_transfer_fractal_nz.cc \ + formats/format_transfers/format_transfer_fractal_zz.cc \ + formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ + formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ + formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ + formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ + formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ + formats/format_transfers/format_transfer_fracz_nchw.cc \ + formats/format_transfers/format_transfer_fracz_nhwc.cc \ + formats/format_transfers/format_transfer_fracz_hwcn.cc \ + formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ + formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ + formats/format_transfers/format_transfer_nchw_fz_c04.cc \ + formats/formats.cc \ + ge_format_util.cc \ + fmk_error_codes.cc \ + util.cc \ + properties_manager.cc \ + types.cc\ + model_parser/base.cc \ + tbe_kernel_store.cc \ + op/attr_value_util.cc \ + op/ge_op_utils.cc \ + thread_pool.cc \ + ge/tbe_plugin_manager.cc \ + +GE_COMMON_LOCAL_C_INCLUDES := \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + proto/tensorflow/graph.proto \ + proto/tensorflow/node_def.proto \ + proto/tensorflow/function.proto \ + proto/tensorflow/versions.proto \ + proto/tensorflow/attr_value.proto \ + proto/tensorflow/tensor.proto \ + proto/tensorflow/tensor_shape.proto \ + proto/tensorflow/op_def.proto \ + proto/tensorflow/types.proto \ + proto/tensorflow/resource_handle.proto \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/common/util \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)third_party/json/include \ + $(TOPDIR)third_party/protobuf/include \ + $(TOPDIR)third_party/openssl/include/x86/include \ + $(TOPDIR)framework/domi \ + $(TOPDIR)framework/domi/common \ + $(TOPDIR)framework/domi/common/op + +#compile host libge_common +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_common + +LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +ifeq ($(DEBUG), 1) + LOCAL_CFLAGS += -g -O0 +else + LOCAL_CFLAGS += -fvisibility=hidden -DHOST_VISIBILITY +endif +ifeq ($(host_os), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(host_os), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif + +LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libprotobuf \ + libc_sec \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + liberror_manager \ + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_SHARED_LIBRARY) + +#compile device libge_common +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_common + +LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +ifeq ($(DEBUG), 1) + LOCAL_CFLAGS += -g -O0 +else + LOCAL_CFLAGS += -fvisibility=hidden -DDEV_VISIBILITY +endif +ifeq ($(host_os), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(host_os), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif + +LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libprotobuf \ + libc_sec \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + liberror_manager \ + +ifeq ($(device_os),android) +LOCAL_LDFLAGS += -ldl +LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog +else +LOCAL_LDFLAGS := -lrt -ldl +endif + +include $(BUILD_SHARED_LIBRARY) + +#compile host libge_common static lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_common + +LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +ifeq ($(DEBUG), 1) + LOCAL_CFLAGS += -g -O0 +endif + +ifeq ($(host_os), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(host_os), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif + +LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := \ + libgraph \ + libprotobuf \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libslog \ + libmmpa \ + libregister \ + liberror_manager \ + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_STATIC_LIBRARY) + +#compile device libge_common static_lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_common + +LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +ifeq ($(DEBUG), 1) + LOCAL_CFLAGS += -g -O0 +endif +ifeq ($(host_os), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(host_os), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), euleros) + LOCAL_CFLAGS += -DOS_CENTOS +endif +ifeq ($(TARGET_OS), centos) + LOCAL_CFLAGS += -DOS_CENTOS +endif + +LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) +LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := \ + libgraph \ + libprotobuf \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libslog \ + libmmpa \ + libregister \ + liberror_manager \ + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_STATIC_LIBRARY) diff --git a/src/ge/common/helper/model_cache_helper.cc b/src/ge/common/helper/model_cache_helper.cc index da1a212e..e9b1de83 100644 --- a/src/ge/common/helper/model_cache_helper.cc +++ b/src/ge/common/helper/model_cache_helper.cc @@ -178,7 +178,7 @@ bool ModelCacheHelper::IsModelCacheHit() const { return false; } if (!IsVarManagerSameAsCache(var_manager_json)) { - GELOGI("Graph id[%u] cache miss: the VarManager dos not match the cache info.", graph_id_); + GELOGI("Graph id[%u] cache miss: the VarManager does not match the cache info.", graph_id_); return false; } GELOGI("Graph id[%u] cache hit.", graph_id_); @@ -563,7 +563,7 @@ Status ModelCacheHelper::GetCacheInfo(CacheInfo &cache_info) const { cache_info.graph_hash = cache_json[kGraphHash]; Json nodes_hash_json = cache_json[kNodeHash]; if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) { - GELOGW("Nodes hash in cache be null or array."); + GELOGW("Nodes hash in cache should be null or array."); return FAILED; } for (const auto &iter : nodes_hash_json) { @@ -1670,7 +1670,7 @@ Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const { ModelData model_data; ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data); if (ret != SUCCESS) { - GELOGW("LoadOmModelFromCache: Load model from file fialed. ret = %u", ret); + GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret); return ret; } diff --git a/src/ge/common/helper/model_helper.cc b/src/ge/common/helper/model_helper.cc index facaabdf..556b43e7 100644 --- a/src/ge/common/helper/model_helper.cc +++ b/src/ge/common/helper/model_helper.cc @@ -144,7 +144,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), platform_version.size() + 1); if (err != EOK) { - GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while while allocating memory for platform_version"); + GELOGE(MEMALLOC_FAILED, "ModelHelper SaveModel failed while allocating memory for platform_version."); return MEMALLOC_FAILED; } string version = reinterpret_cast(model_header.platform_version); diff --git a/src/ge/common/helper/om_file_helper.cc b/src/ge/common/helper/om_file_helper.cc index 917807f0..0d58fe71 100644 --- a/src/ge/common/helper/om_file_helper.cc +++ b/src/ge/common/helper/om_file_helper.cc @@ -52,7 +52,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::Init(u FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetModelPartition(ModelPartitionType type, ModelPartition &partition) { if (!is_inited_) { - GELOGE(PARAM_INVALID, "OmFileLoadHelper not Inited!"); + GELOGE(PARAM_INVALID, "OmFileLoadHelper has not been initialized!"); return PARAM_INVALID; } @@ -67,7 +67,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod if (!found) { if (type != ModelPartitionType::TBE_KERNELS) { - GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas", static_cast(type)); + GELOGE(FAILED, "GetModelPartition:type:%d is not in partition_datas!", static_cast(type)); return FAILED; } } @@ -77,7 +77,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OmFileLoadHelper::GetMod Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { // Parameter validity check if (model.model_data == nullptr) { - GELOGE(PARAM_INVALID, "Model_data must not be null"); + GELOGE(PARAM_INVALID, "Model_data must not be null!"); return PARAM_INVALID; } @@ -103,7 +103,7 @@ Status OmFileLoadHelper::CheckModelValid(const ge::ModelData &model) const { Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size) { if (model_data == nullptr) { - GELOGE(PARAM_INVALID, "Param model_data must not be null"); + GELOGE(PARAM_INVALID, "Param model_data must not be null!"); return PARAM_INVALID; } // Init partition table @@ -131,7 +131,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint context_.partition_datas_.push_back(partition); if (partition.size > model_data_size || mem_offset > model_data_size - partition.size) { - GELOGE(PARAM_INVALID, "the current need partition sizes %zu greater than the model data size %u ", + GELOGE(PARAM_INVALID, "The partition size %zu is greater than the model data size %u.", partition.size + mem_offset, model_data_size); return PARAM_INVALID; } @@ -199,7 +199,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat ModelPartitionTable *partition_table = GetPartitionTable(); if (partition_table == nullptr) { - GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile exe failed: partition_table is NULL"); + GELOGE(ge::GE_GRAPH_SAVE_FAILED, "SaveModelToFile execute failed: partition_table is NULL."); return ge::GE_GRAPH_SAVE_FAILED; } uint32_t size_of_table = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); diff --git a/src/ge/common/model_saver.cc b/src/ge/common/model_saver.cc index f68051f4..11d9e804 100644 --- a/src/ge/common/model_saver.cc +++ b/src/ge/common/model_saver.cc @@ -26,6 +26,7 @@ #include "framework/common/debug/log.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" +#include "common/util/error_manager/error_manager.h" namespace ge { const uint32_t kInteval = 2; @@ -41,10 +42,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi try { model_str = model.dump(kInteval, ' ', false, Json::error_handler_t::ignore); } catch (std::exception &e) { - GELOGE(FAILED, "Transfer json to string failed, reason: %s.", e.what()); + ErrorManager::GetInstance().ATCReportErrMessage("E19007", {"exception"}, {e.what()}); + GELOGE(FAILED, "Failed to convert JSON to string, reason: %s.", e.what()); return FAILED; } catch (...) { - GELOGE(FAILED, "Transfer json to string failed."); + ErrorManager::GetInstance().ATCReportErrMessage("E19008"); + GELOGE(FAILED, "Failed to convert JSON to string."); return FAILED; } @@ -57,6 +60,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi mode_t mode = S_IRUSR | S_IWUSR; int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { + ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"filepath", "errMsg"}, {file_path, strerror(errno)}); GELOGE(FAILED, "Open file failed. file path : %s, %s", file_path, strerror(errno)); return FAILED; } @@ -65,6 +69,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi // Write data to file mmSsize_t mmpa_ret = mmWrite(fd, const_cast((const void *)model_char), len); if (mmpa_ret == EN_ERROR || mmpa_ret == EN_INVALID_PARAM) { + ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"mmpa_ret", "errMsg"}, + {std::to_string(mmpa_ret), strerror(errno)}); // Need to both print the error info of mmWrite and mmClose, so return ret after mmClose GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); ret = FAILED; diff --git a/src/ge/common/module.mk b/src/ge/common/module.mk new file mode 100644 index 00000000..c6971682 --- /dev/null +++ b/src/ge/common/module.mk @@ -0,0 +1,3 @@ +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/ge_common.mk diff --git a/src/ge/common/op/ge_op_utils.cc b/src/ge/common/op/ge_op_utils.cc index bba1afe8..1dc268b2 100644 --- a/src/ge/common/op/ge_op_utils.cc +++ b/src/ge/common/op/ge_op_utils.cc @@ -114,6 +114,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::AippOpParams *aipp_params) { GE_CHECK_NOTNULL(aipp_params); AIPP_CONVERT_FORMAT_EX(aipp_mode, domi::AippOpParams::AippMode, int32_t, GeAttrValue::INT); + AIPP_CONVERT_INT(related_input_rank); if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) { AIPP_CONVERT_INT(max_src_image_size); @@ -149,6 +150,7 @@ OpUtils::ConvertAippParams(const GeAttrValue::NAMED_ATTRS &aipp_attr, domi::Aipp AIPP_CONVERT_LIST_FLOAT(var_reci_chn_0, true); AIPP_CONVERT_LIST_FLOAT(var_reci_chn_1, true); AIPP_CONVERT_LIST_FLOAT(var_reci_chn_2, true); + AIPP_CONVERT_LIST_FLOAT(var_reci_chn_3, true); const bool csc_switch = aipp_params->csc_switch(); AIPP_CONVERT_LIST_INT(matrix_r0c0, csc_switch); diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc index 8a29f0b4..748b9880 100644 --- a/src/ge/common/profiling/profiling_manager.cc +++ b/src/ge/common/profiling/profiling_manager.cc @@ -478,24 +478,32 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportProfilingData( const std::vector &task_desc_info, const std::vector &compute_graph_desc_info) { #ifdef DAVINCI_SUPPORT_PROFILING - int32_t device_id = 0; - rtError_t rt_ret = rtGetDevice(&device_id); + int32_t logic_device_id = 0; + rtError_t rt_ret = rtGetDevice(&logic_device_id); if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "runtime get device_id failed, current device_id:%d", device_id); + GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); return; } - GELOGI("current device_id:%d", device_id); + GELOGI("current logic_device_id:%d", logic_device_id); - auto ret = std::find(device_id_.begin(), device_id_.end(), device_id); + uint32_t phy_device_id = 0; + rt_ret = rtGetDevicePhyIdByIndex((uint32_t)logic_device_id, &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); + return; + } + GELOGI("current phy_device_id:%d", phy_device_id); + + auto ret = std::find(device_id_.begin(), device_id_.end(), phy_device_id); if (ret == device_id_.end()) { - GELOGE(FAILED, "get valid device_id failed, profiling report failed."); + GELOGE(FAILED, "get valid phy_device_id failed, profiling report failed."); return; } GELOGI("start ProfilingTaskDescInfo."); - ProfilingTaskDescInfo(task_desc_info, device_id); + ProfilingTaskDescInfo(task_desc_info, phy_device_id); GELOGI("start ProfilingGraphDescInfo."); - ProfilingGraphDescInfo(compute_graph_desc_info, device_id); + ProfilingGraphDescInfo(compute_graph_desc_info, phy_device_id); GELOGI("Report profiling data for GE end."); #endif } diff --git a/src/ge/common/types.cc b/src/ge/common/types.cc index 751e36b7..97761dea 100644 --- a/src/ge/common/types.cc +++ b/src/ge/common/types.cc @@ -116,6 +116,7 @@ REGISTER_OPTYPE_DEFINE(SLICE, "Slice"); REGISTER_OPTYPE_DEFINE(SLICED, "SliceD"); REGISTER_OPTYPE_DEFINE(FLOORDIV, "FloorDiv"); REGISTER_OPTYPE_DEFINE(SQUEEZE, "Squeeze"); +REGISTER_OPTYPE_DEFINE(UNSQUEEZE, "Unsqueeze"); REGISTER_OPTYPE_DEFINE(STRIDEDSLICE, "StridedSlice"); REGISTER_OPTYPE_DEFINE(RANGE, "Range"); REGISTER_OPTYPE_DEFINE(RPNPROPOSALS, "RpnProposals"); diff --git a/src/ge/common/util.cc b/src/ge/common/util.cc index 0a6561c4..50ed2f33 100644 --- a/src/ge/common/util.cc +++ b/src/ge/common/util.cc @@ -67,9 +67,8 @@ static bool ReadProtoFromCodedInputStream(CodedInputStream &coded_stream, Messag } FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromBinaryFile(const char *file, Message *proto) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr), - ErrorManager::GetInstance().ATCReportErrMessage("E19001"); - return false, "Input parameter file or proto is nullptr!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file == nullptr || proto == nullptr), return false, + "Input parameter file or proto is nullptr!"); std::string real_path = RealPath(file); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(real_path.empty(), return false, "pb file path '%s' not valid", file); @@ -119,8 +118,9 @@ long GetFileLength(const std::string &input_file) { ErrorManager::GetInstance().ATCReportErrMessage("E10037", {"filepath"}, {input_file}); return -1, "Open file[%s] failed", input_file.c_str()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), ErrorManager::GetInstance().ATCReportErrMessage("E10038"); - return -1, "File[%s] length is 0, not valid.", input_file.c_str()); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((file_length == 0), + ErrorManager::GetInstance().ATCReportErrMessage("E10038", {"filepath"}, {input_file}); + return -1, "File[%s] size is 0, not valid.", input_file.c_str()); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( file_length > kMaxFileSizeLimit, ErrorManager::GetInstance().ATCReportErrMessage( @@ -207,7 +207,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std:: if (dir_path_len >= PATH_MAX) { ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {directory_path, std::to_string(PATH_MAX)}); - GELOGW("Path[%s] len is too long, it must smaller than %d", directory_path.c_str(), PATH_MAX); + GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX); return -1; } char tmp_dir_path[PATH_MAX] = {0}; @@ -338,14 +338,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( strlen(path) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); - return "", "Path[%s] len is too long, it must smaller than %d", path, PATH_MAX); + return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); // PATH_MAX is the system's own macro, indicating the maximum file path length supported std::shared_ptr resolved_path(new (std::nothrow) char[PATH_MAX](), std::default_delete()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( - resolved_path == nullptr, - ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); - return "", "Path[%s] new string object len[%d] failed.", path, PATH_MAX); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(resolved_path == nullptr, return "", "Path[%s] new string object len[%d] failed.", + path, PATH_MAX); // Nullptr is returned when the path does not exist or there is no permission // Return absolute path when path is accessible @@ -384,7 +382,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const !ValidateStr(real_path, mode), ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path}); return false, - "Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " + "Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " "and chinese character.", atc_param.c_str(), real_path.c_str()); @@ -420,7 +418,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const !ValidateStr(real_path, mode), ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "path"}, {atc_param, real_path}); return false, - "Input parameter's value[%s] is illegal. The path[%s] can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " + "Input parameter[--%s]'s value[%s] is illegal. The path can only contains 'a-z' 'A-Z' '0-9' '-' '.' '_' " "and chinese character.", atc_param.c_str(), real_path.c_str()); diff --git a/src/ge/engine_manager/dnnengine_manager.cc b/src/ge/engine_manager/dnnengine_manager.cc index 1eb38489..c8843c09 100644 --- a/src/ge/engine_manager/dnnengine_manager.cc +++ b/src/ge/engine_manager/dnnengine_manager.cc @@ -75,7 +75,7 @@ Status DNNEngineManager::Initialize(const std::map &op return status; } - GELOGI("The number of DNNEngineObjs are %zu.", engines_map_.size()); + GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size()); // Engines initialize for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) { @@ -373,7 +373,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno)); return FAILED; } else { - GELOGW("The json file %s is not need", file_path.c_str()); + GELOGW("The json file %s is not needed.", file_path.c_str()); return SUCCESS; } } diff --git a/src/ge/executor/CMakeLists.txt b/src/ge/executor/CMakeLists.txt index 90b091d2..cddf25b7 100755 --- a/src/ge/executor/CMakeLists.txt +++ b/src/ge/executor/CMakeLists.txt @@ -30,6 +30,7 @@ file(GLOB SRC_LIST RELATIVE ${CMAKE_CURRENT_LIST_DIR} "../common/profiling/profiling_manager.cc" "../graph/execute/graph_execute.cc" "../graph/load/graph_loader.cc" + "../graph/load/new_model_manager/aipp_utils.cc" "../graph/load/new_model_manager/cpu_queue_schedule.cc" "../graph/load/new_model_manager/data_dumper.cc" "../graph/load/new_model_manager/data_inputer.cc" diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc index 92529598..210eecd6 100644 --- a/src/ge/executor/ge_executor.cc +++ b/src/ge/executor/ge_executor.cc @@ -38,6 +38,7 @@ namespace { const size_t kDynamicBatchSizeVecSize = 1; +const size_t kStaticBatchInfoSize = 1; const size_t kDynamicImageSizeVecSize = 2; const size_t kDynamicImageSizeInputSize = 2; const char *const kBatchLabel = "Batch_"; @@ -180,16 +181,16 @@ class ModelListenerAdapter : public ModelListener { GeExecutor::GeExecutor() {} Status GeExecutor::Initialize() { - GELOGI("Init ge_executor begin."); + GELOGI("Init GeExecutor begin."); if (isInit_) { - GELOGW("Already inited, don't need to init again."); + GELOGW("Already initialized, no need to be initialized again."); return ge::SUCCESS; } std::vector mem_type(1, RT_MEMORY_HBM); auto ret = MemManager::Instance().Initialize(mem_type); if (ret != SUCCESS) { - GELOGE(ret, "Memory Manager init fail."); + GELOGE(ret, "Memory Manager init failed."); return ret; } @@ -200,14 +201,14 @@ Status GeExecutor::Initialize() { ProfilingManager::Instance().Init(profiling_options); isInit_ = true; - GELOGI("Init ge_executor over."); + GELOGI("Init GeExecutor over."); return ge::SUCCESS; } Status GeExecutor::Finalize() { - GELOGI("Uninit ge_executor begin."); + GELOGI("Uninit GeExecutor begin."); if (isInit_ == false) { - GELOGW("ge_executor needs to init begin."); + GELOGW("GeExecutor has not been initialized."); return ge::SUCCESS; } @@ -217,7 +218,7 @@ Status GeExecutor::Finalize() { ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); } - GELOGI("Uninit ge_executor over."); + GELOGI("Uninit GeExecutor over."); return ge::SUCCESS; } @@ -236,6 +237,7 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad // Verify whether the input dynamic batch matches the model gear std::vector> batch_info; + std::vector batch_num{batch_size}; Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); if (ret != SUCCESS) { GELOGE(FAILED, "Get dynamic input info failed."); @@ -247,6 +249,11 @@ Status GeExecutor::SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_ad return FAILED; } + ret = GraphExecutor::SetDynamicSize(model_id, batch_num); + if (ret != SUCCESS) { + GELOGE(FAILED, "Set dynamic size failed"); + return FAILED; + } // memcpy dynamic_batch_size from host to device if (rtMemcpy(dynamic_input_addr, length, &batch_size, size, RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { GELOGE(FAILED, "memcpy dynamic batch input data failed!"); @@ -270,6 +277,7 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad // Verify whether the input dynamic resolution matches the model gear std::vector> batch_info; + std::vector batch_num{image_height, image_width}; Status ret = GraphExecutor::GetDynamicBatchInfo(model_id, batch_info); if (ret != SUCCESS) { GELOGE(FAILED, "Get dynamic input info failed."); @@ -281,6 +289,11 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad return FAILED; } + ret = GraphExecutor::SetDynamicSize(model_id, batch_num); + if (ret != SUCCESS) { + GELOGE(FAILED, "Set dynamic size failed"); + return FAILED; + } // Memcpy dynamic resolution height from host to device if (rtMemcpy(dynamic_input_addr, sizeof(uint64_t), &image_height, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE) != RT_ERROR_NONE) { @@ -298,6 +311,20 @@ Status GeExecutor::SetDynamicImageSize(uint32_t model_id, void *dynamic_input_ad return SUCCESS; } +Status GeExecutor::GetCurShape(const uint32_t model_id, std::vector &batch_info) { + GELOGI("Begin to get current shape"); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); + return GE_EXEC_NOT_INIT; + } + Status ret = GraphExecutor::GetCurShape(model_id, batch_info); + if (ret != SUCCESS) { + GELOGE(FAILED, "Get current shape failed"); + return FAILED; + } + return SUCCESS; +} + Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length, const std::vector &aippBatchPara, const kAippDynamicPara &aippParms) { @@ -346,13 +373,13 @@ Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, int32_t priority, std::shared_ptr listener) { GELOGI("load model offline begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } string filePath = RealPath(path.c_str()); if (filePath.empty()) { - GELOGE(ge::FAILED, "fileath is invalid. please check your text file '%s'.", path.c_str()); + GELOGE(ge::FAILED, "File path is invalid. please check your text file '%s'.", path.c_str()); return ge::FAILED; } @@ -375,7 +402,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, std::shared_ptr listener) { GELOGI("Load model begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -397,7 +424,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, Status GeExecutor::UnloadModel(uint32_t model_id) { GELOGI("unload model %u begin.", model_id); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id); @@ -411,7 +438,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { GELOGI("run model begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -428,7 +455,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector &output_desc) { GELOGI("get model desc info begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -436,12 +463,11 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector output_desc_infos; std::vector input_formats; std::vector output_formats; - GELOGI("GetInputOutputDescInfo via new ome."); Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); if (ret != domi::SUCCESS) { - GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); + GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret); return TransferDomiErrorCode(ret); } @@ -473,7 +499,7 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector> &batch_info) { GELOGI("Begin to get dynamic batch info."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -487,11 +513,49 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector &dynamic_output_shape_info) { + GELOGI("Begin to get dynamic batch output shape info"); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + return GE_EXEC_NOT_INIT; + } + Status ret = GraphExecutor::GetModelAttr(model_id, dynamic_output_shape_info); + if (ret != SUCCESS) { + GELOGE(ret, "Get dynamic batch output shape info failed."); + return ret; + } + + GELOGI("Get dynamic batch output shape info succ."); + return SUCCESS; +} + Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, std::vector &output_desc) { GELOGI("get model desc info for zero copy begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -499,12 +563,11 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector output_desc_infos; std::vector input_formats; std::vector output_formats; - GELOGI("GetInputOutputDescInfoForZeroCopy via new ome."); Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats); if (ret != domi::SUCCESS) { - GELOGE(ret, "Get DescInfo For ZeroCopy failed. ret = %u", ret); + GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret); return TransferDomiErrorCode(ret); } @@ -521,7 +584,7 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &output_queue_ids) { GELOGI("Load model with queue begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } return GraphLoader::LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); @@ -638,7 +701,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel ge::RunModelData &run_output_data, bool async_mode) { GELOGI("Execute model begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -674,7 +737,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel Status GeExecutor::GetMemAndWeightSize(const std::string &path, size_t &mem_size, size_t &weight_size) { GELOGI("Get memory and weight size from file begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -707,7 +770,7 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size size_t &weight_size) { GELOGI("Get memory and weight size from data begin."); if (!isInit_) { - GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); return GE_EXEC_NOT_INIT; } @@ -741,4 +804,56 @@ Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector> batch_info; + Status ret = GetDynamicBatchInfo(model_id, batch_info); + if (ret != SUCCESS) { + GELOGE(ret, "Calc batch info size failed. ret = %d", ret); + return ret; + } + if (batch_info.empty()) { + shape_count = kStaticBatchInfoSize; + } else { + shape_count = batch_info.size(); + } + return SUCCESS; +} + +Status GeExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { + GELOGI("Begin to GetOrigInputInfo."); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + return GE_EXEC_NOT_INIT; + } + + Status ret = GraphExecutor::GetOrigInputInfo(model_id, index, orig_input_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetOrigInputInfo failed."); + return ret; + } + + GELOGI("GetOrigInputInfo succ."); + return SUCCESS; +} + +Status GeExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, + std::vector &input_dims, + std::vector &output_dims) { + GELOGI("Begin to GetAllAippInputOutputDims."); + if (!isInit_) { + GELOGE(GE_EXEC_NOT_INIT, "not inited yet!"); + return GE_EXEC_NOT_INIT; + } + + Status ret = GraphExecutor::GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); + if (ret != SUCCESS) { + GELOGE(ret, "GetAllAippInputOutputDims failed."); + return ret; + } + + GELOGI("GetAllAippInputOutputDims succ."); + return SUCCESS; +} + } // namespace ge diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk new file mode 100644 index 00000000..efed8854 --- /dev/null +++ b/src/ge/executor/module.mk @@ -0,0 +1,202 @@ +LOCAL_PATH := $(call my-dir) + +local_ge_executor_src_files := \ + ge_executor.cc \ + ../common/profiling/profiling_manager.cc \ + ../common/ge/plugin_manager.cc \ + ../graph/load/graph_loader.cc \ + ../graph/execute/graph_execute.cc \ + ../omm/csa_interact.cc \ + ../graph/manager/graph_manager_utils.cc \ + ../graph/manager/graph_var_manager.cc \ + ../graph/manager/graph_mem_allocator.cc \ + ../graph/manager/graph_caching_allocator.cc \ + ../graph/manager/trans_var_data_utils.cc \ + ../graph/manager/util/debug.cc \ + ../model/ge_model.cc \ + ../model/ge_root_model.cc \ + ../graph/load/new_model_manager/davinci_model.cc \ + ../graph/load/new_model_manager/davinci_model_parser.cc \ + ../graph/load/new_model_manager/model_manager.cc \ + ../graph/load/new_model_manager/tbe_handle_store.cc \ + ../graph/load/new_model_manager/cpu_queue_schedule.cc \ + ../graph/load/new_model_manager/model_utils.cc \ + ../graph/load/new_model_manager/aipp_utils.cc \ + ../graph/load/new_model_manager/data_inputer.cc \ + ../graph/load/new_model_manager/data_dumper.cc \ + ../graph/load/new_model_manager/zero_copy_task.cc \ + ../graph/load/new_model_manager/task_info/task_info.cc \ + ../graph/load/new_model_manager/task_info/event_record_task_info.cc \ + ../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ + ../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ + ../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ + ../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ + ../graph/load/new_model_manager/task_info/kernel_task_info.cc \ + ../graph/load/new_model_manager/task_info/label_set_task_info.cc \ + ../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ + ../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ + ../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ + ../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ + ../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ + ../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ + ../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ + ../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ + ../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ + ../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ + ../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + ../graph/load/output/output.cc \ + ../single_op/single_op_manager.cc \ + ../single_op/single_op_model.cc \ + ../single_op/single_op.cc \ + ../single_op/stream_resource.cc \ + ../single_op/task/op_task.cc \ + ../single_op/task/build_task_utils.cc \ + ../single_op/task/tbe_task_builder.cc \ + ../single_op/task/aicpu_task_builder.cc \ + ../hybrid/hybrid_davinci_model_stub.cc\ + +local_ge_executor_c_include := \ + proto/insert_op.proto \ + proto/op_mapping_info.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/om.proto \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc \ + $(LOCAL_PATH)/../ \ + $(TOPDIR)libc_sec/include \ + third_party/protobuf/include \ + third_party/json/include \ + +local_ge_executor_shared_library := \ + libprotobuf \ + libc_sec \ + libge_common \ + libruntime \ + libslog \ + libmmpa \ + libgraph \ + libmsprof \ + +local_ge_executor_ldflags := -lrt -ldl \ + + +#compile arm device dynamic lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_executor +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING + +LOCAL_SRC_FILES := $(local_ge_executor_src_files) +LOCAL_C_INCLUDES := $(local_ge_executor_c_include) + +LOCAL_SHARED_LIBRARIES := $(local_ge_executor_shared_library) +ifeq ($(device_os),android) +LOCAL_LDFLAGS += -ldl +LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog +else +LOCAL_LDFLAGS += $(local_ge_executor_ldflags) +endif + +include $(BUILD_SHARED_LIBRARY) + +#compile x86 host dynamic lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_executor +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +else +LOCAL_CFLAGS += -O2 +endif + +LOCAL_SRC_FILES := $(local_ge_executor_src_files) + +LOCAL_C_INCLUDES := $(local_ge_executor_c_include) + +LOCAL_SHARED_LIBRARIES := \ + libprotobuf \ + libc_sec \ + libge_common \ + libruntime \ + libslog \ + libmmpa \ + libgraph \ + libmsprof \ + +LOCAL_LDFLAGS += $(local_ge_executor_ldflags) + +include $(BUILD_HOST_SHARED_LIBRARY) + +#compile for host static lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_executor +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +else +LOCAL_CFLAGS += -O2 +endif + +LOCAL_SRC_FILES := $(local_ge_executor_src_files) + +LOCAL_C_INCLUDES := $(local_ge_executor_c_include) + +LOCAL_STATIC_LIBRARIES := \ + libge_common \ + libgraph \ + libprotobuf \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libruntime \ + libslog \ + libmmpa \ + libmsprof \ + +LOCAL_LDFLAGS += $(local_ge_executor_ldflags) + +include $(BUILD_HOST_STATIC_LIBRARY) + +#compile for device static lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_executor +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +else +LOCAL_CFLAGS += -O2 +endif + +LOCAL_SRC_FILES := $(local_ge_executor_src_files) +LOCAL_C_INCLUDES := $(local_ge_executor_c_include) + +LOCAL_STATIC_LIBRARIES := \ + libge_common \ + libgraph \ + libprotobuf \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libruntime \ + libslog \ + libmmpa \ + libmsprof \ + +ifeq ($(device_os),android) +LOCAL_LDFLAGS += -ldl +LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog +else +LOCAL_LDFLAGS += $(local_ge_executor_ldflags) +endif + +include $(BUILD_STATIC_LIBRARY) diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk new file mode 100644 index 00000000..e12989c0 --- /dev/null +++ b/src/ge/ge_inference.mk @@ -0,0 +1,407 @@ +LOCAL_PATH := $(call my-dir) + +COMMON_LOCAL_SRC_FILES := \ + proto/fusion_model.proto \ + proto/optimizer_priority.proto \ + graph/manager/trans_var_data_utils.cc \ + omm/csa_interact.cc \ + common/fp16_t.cc \ + common/formats/utils/formats_trans_utils.cc \ + common/formats/format_transfers/datatype_transfer.cc \ + common/formats/format_transfers/format_transfer_transpose.cc \ + common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ + common/formats/format_transfers/format_transfer_fractal_z.cc \ + common/formats/format_transfers/format_transfer_fractal_nz.cc \ + common/formats/format_transfers/format_transfer_fractal_zz.cc \ + common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ + common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ + common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ + common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ + common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ + common/formats/format_transfers/format_transfer_fracz_nchw.cc \ + common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ + common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ + common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ + common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ + common/formats/format_transfers/format_transfer_nchw_fz_c04.cc \ + common/formats/formats.cc \ + common/profiling/profiling_manager.cc \ + common/helper/model_cache_helper.cc \ + ge_local_engine/engine/host_cpu_engine.cc \ + + +GRAPH_MANAGER_LOCAL_SRC_FILES := \ + common/ge/plugin_manager.cc\ + init/gelib.cc \ + session/inner_session.cc \ + session/session_manager.cc \ + engine_manager/dnnengine_manager.cc \ + opskernel_manager/ops_kernel_manager.cc \ + graph/manager/graph_manager.cc \ + graph/manager/graph_manager_utils.cc \ + graph/manager/graph_context.cc \ + graph/preprocess/graph_preprocess.cc \ + graph/preprocess/multi_batch_copy_graph.cc \ + graph/execute/graph_execute.cc \ + graph/load/graph_loader.cc \ + graph/optimize/graph_optimize.cc \ + graph/optimize/summary_optimize.cc \ + graph/build/graph_builder.cc \ + graph/partition/engine_place.cc \ + graph/partition/graph_partition.cc \ + graph/partition/dynamic_shape_partition.cc \ + generator/ge_generator.cc \ + generator/generator_api.cc \ + graph/manager/graph_var_manager.cc \ + graph/manager/graph_mem_allocator.cc \ + graph/manager/graph_caching_allocator.cc \ + +BUILER_SRC_FILES := \ + ir_build/ge_ir_build.cc \ + ir_build/atc_ir_common.cc \ + +OMG_HOST_SRC_FILES := \ + model/ge_model.cc \ + model/ge_root_model.cc \ + graph/common/transop_util.cc \ + graph/passes/pass_manager.cc \ + graph/passes/resource_pair_add_control_pass.cc \ + graph/passes/resource_pair_remove_control_pass.cc \ + graph/passes/pass_utils.cc \ + graph/passes/base_pass.cc \ + graph/passes/constant_folding_pass.cc \ + graph/passes/aicpu_constant_folding_pass.cc \ + graph/passes/reshape_remove_pass.cc \ + graph/passes/reshape_recovery_pass.cc \ + graph/passes/transop_breadth_fusion_pass.cc \ + graph/passes/transop_depth_fusion_pass.cc \ + graph/passes/transop_nearby_allreduce_fusion_pass.cc \ + graph/passes/same_transdata_breadth_fusion_pass.cc \ + graph/passes/transop_without_reshape_fusion_pass.cc \ + graph/passes/compile_nodes_pass.cc \ + graph/passes/variable_prepare_op_pass.cc \ + graph/passes/variable_ref_delete_op_pass.cc \ + graph/passes/variable_ref_useless_control_out_delete_pass.cc \ + graph/passes/subgraph_pass.cc \ + graph/passes/data_pass.cc \ + graph/passes/net_output_pass.cc \ + graph/passes/replace_transshape_pass.cc \ + graph/passes/constant_fuse_same_pass.cc \ + graph/passes/print_op_pass.cc \ + graph/passes/no_use_reshape_remove_pass.cc \ + graph/passes/iterator_op_pass.cc \ + graph/passes/atomic_addr_clean_pass.cc \ + graph/common/omg_util.cc \ + graph/common/bcast.cc \ + graph/passes/dimension_compute_pass.cc \ + graph/passes/dimension_adjust_pass.cc \ + graph/passes/get_original_format_pass.cc \ + graph/passes/shape_operate_op_remove_pass.cc \ + graph/passes/unused_op_remove_pass.cc \ + graph/passes/assert_pass.cc \ + graph/passes/dropout_pass.cc \ + graph/passes/infershape_pass.cc \ + graph/passes/unused_const_pass.cc \ + graph/passes/isolated_op_remove_pass.cc \ + graph/passes/permute_pass.cc \ + graph/passes/ctrl_edge_transfer_pass.cc \ + host_kernels/broadcast_gradient_args_kernel.cc \ + host_kernels/greater_kernel.cc \ + host_kernels/gather_v2_kernel.cc \ + host_kernels/maximum_kernel.cc \ + host_kernels/floormod_kernel.cc \ + host_kernels/floordiv_kernel.cc \ + host_kernels/range_kernel.cc \ + host_kernels/shape_kernel.cc \ + host_kernels/size_kernel.cc \ + host_kernels/shape_n_kernel.cc \ + host_kernels/rank_kernel.cc \ + host_kernels/broadcast_args_kernel.cc \ + host_kernels/fill_kernel.cc \ + host_kernels/empty_kernel.cc \ + host_kernels/expanddims_kernel.cc \ + host_kernels/reshape_kernel.cc \ + host_kernels/squeeze_kernel.cc \ + host_kernels/unsqueeze_kernel.cc \ + host_kernels/kernel_utils.cc \ + host_kernels/cast_kernel.cc \ + host_kernels/transdata_kernel.cc \ + host_kernels/unpack_kernel.cc \ + host_kernels/transpose_kernel.cc \ + host_kernels/permute_kernel.cc \ + host_kernels/pack_kernel.cc \ + host_kernels/concat_v2_kernel.cc \ + host_kernels/concat_offset_kernel.cc \ + host_kernels/strided_slice_kernel.cc \ + host_kernels/ssd_prior_box_kernel.cc \ + host_kernels/add_kernel.cc \ + host_kernels/sub_kernel.cc \ + host_kernels/mul_kernel.cc \ + host_kernels/reduce_prod_kernel.cc \ + host_kernels/rsqrt_kernel.cc \ + host_kernels/slice_kernel.cc \ + host_kernels/slice_d_kernel.cc \ + host_kernels/dynamic_stitch_kernel.cc \ + graph/passes/stop_gradient_pass.cc \ + graph/passes/prevent_gradient_pass.cc \ + graph/passes/identity_pass.cc \ + graph/passes/placeholder_with_default_pass.cc \ + graph/passes/snapshot_pass.cc \ + graph/passes/guarantee_const_pass.cc \ + graph/passes/var_is_initialized_op_pass.cc \ + graph/passes/parallel_concat_start_op_pass.cc \ + graph/passes/folding_pass.cc \ + graph/passes/cast_translate_pass.cc \ + graph/passes/prune_pass.cc \ + graph/passes/switch_op_pass.cc \ + graph/passes/multi_batch_pass.cc \ + graph/passes/next_iteration_pass.cc \ + graph/passes/control_trigger_pass.cc \ + graph/passes/cond_pass.cc \ + graph/passes/cond_remove_pass.cc \ + graph/passes/for_pass.cc \ + graph/passes/enter_pass.cc \ + graph/passes/addn_pass.cc \ + graph/passes/common_subexpression_elimination_pass.cc \ + graph/passes/transop_symmetry_elimination_pass.cc \ + graph/passes/save_pass.cc \ + graph/passes/switch_dead_branch_elimination.cc \ + graph/passes/switch_logic_remove_pass.cc \ + graph/passes/switch_data_edges_bypass.cc \ + graph/passes/merge_pass.cc \ + graph/passes/variable_format_pass.cc \ + graph/passes/variable_op_pass.cc \ + graph/passes/cast_remove_pass.cc \ + graph/passes/transpose_transdata_pass.cc \ + graph/passes/identify_reference_pass.cc \ + graph/passes/hccl_memcpy_pass.cc \ + graph/passes/flow_ctrl_pass.cc \ + graph/passes/link_gen_mask_nodes_pass.cc \ + graph/passes/replace_with_empty_const_pass.cc \ + graph/passes/hccl_group_pass.cc \ + graph/passes/switch_fusion_pass.cc \ + graph/passes/switch_split_pass.cc \ + +OMG_DEVICE_SRC_FILES := $(OMG_HOST_SRC_FILES) + + +OME_HOST_SRC_FILES := \ + graph/manager/model_manager/event_manager.cc \ + graph/manager/util/rt_context_util.cc \ + graph/manager/util/variable_accelerate_ctrl.cc \ + graph/manager/util/debug.cc \ + graph/load/new_model_manager/model_manager.cc \ + graph/load/new_model_manager/data_inputer.cc \ + graph/load/new_model_manager/davinci_model.cc \ + graph/load/new_model_manager/davinci_model_parser.cc \ + graph/load/new_model_manager/model_utils.cc \ + graph/load/new_model_manager/aipp_utils.cc \ + graph/load/new_model_manager/tbe_handle_store.cc \ + graph/load/new_model_manager/cpu_queue_schedule.cc \ + graph/load/new_model_manager/zero_copy_task.cc \ + graph/load/output/output.cc \ + graph/load/new_model_manager/data_dumper.cc \ + graph/load/new_model_manager/task_info/task_info.cc \ + graph/load/new_model_manager/task_info/event_record_task_info.cc \ + graph/load/new_model_manager/task_info/event_wait_task_info.cc \ + graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ + graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/new_model_manager/task_info/kernel_task_info.cc \ + graph/load/new_model_manager/task_info/label_set_task_info.cc \ + graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/new_model_manager/task_info/stream_active_task_info.cc \ + graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ + graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/new_model_manager/task_info/end_graph_task_info.cc \ + graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + single_op/task/op_task.cc \ + single_op/task/build_task_utils.cc \ + single_op/task/tbe_task_builder.cc \ + single_op/task/aicpu_task_builder.cc \ + single_op/single_op.cc \ + single_op/single_op_model.cc \ + single_op/stream_resource.cc \ + single_op/single_op_manager.cc \ + hybrid/hybrid_davinci_model_stub.cc \ + # graph/load/new_model_manager/task_info/hccl_task_info.cc + +OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) + +COMMON_LOCAL_C_INCLUDES := \ + proto/om.proto \ + proto/task.proto \ + proto/insert_op.proto \ + proto/ge_ir.proto \ + proto/fwk_adapter.proto \ + proto/op_mapping_info.proto \ + proto/tensorflow/attr_value.proto \ + proto/tensorflow/function.proto \ + proto/tensorflow/graph.proto \ + proto/tensorflow/node_def.proto \ + proto/tensorflow/op_def.proto \ + proto/tensorflow/resource_handle.proto \ + proto/tensorflow/tensor.proto \ + proto/tensorflow/tensor_shape.proto \ + proto/tensorflow/types.proto \ + proto/tensorflow/versions.proto \ + $(LOCAL_PATH) ./ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/common \ + $(TOPDIR)inc/common \ + $(TOPDIR)inc/runtime \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)ops/built-in/op_proto/inc \ + third_party/json/include \ + third_party/protobuf/include \ + third_party/opencv/include \ + +NEW_OMG_HOST_SRC_FILES := \ + graph/preprocess/insert_op/util_insert_aipp_op.cc \ + graph/preprocess/insert_op/ge_aipp_op.cc \ + graph/build/model_builder.cc \ + graph/build/task_generator.cc \ + graph/build/stream_allocator.cc \ + graph/build/logical_stream_allocator.cc \ + graph/build/stream_graph_optimizer.cc \ + graph/build/run_context.cc \ + graph/build/label_allocator.cc \ + graph/label/label_maker.cc \ + graph/label/if_label_maker.cc \ + graph/label/case_label_maker.cc \ + graph/label/while_label_maker.cc \ + graph/label/partitioned_call_label_maker.cc \ + +OME_HOST_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) +OMG_DEVICE_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) + +DEVICE_LOCAL_C_INCLUDES := \ + proto/om.proto \ + proto/task.proto \ + proto/insert_op.proto \ + proto/ge_ir.proto \ + proto/fwk_adapter.proto \ + proto/op_mapping_info.proto \ + proto/tensorflow/attr_value.proto \ + proto/tensorflow/function.proto \ + proto/tensorflow/graph.proto \ + proto/tensorflow/node_def.proto \ + proto/tensorflow/op_def.proto \ + proto/tensorflow/resource_handle.proto \ + proto/tensorflow/tensor.proto \ + proto/tensorflow/tensor_shape.proto \ + proto/tensorflow/types.proto \ + proto/tensorflow/versions.proto \ + $(LOCAL_PATH) ./ \ + $(TOPDIR)inc \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/common/util \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/common \ + $(TOPDIR)inc/runtime \ + $(TOPDIR)ops/built-in/op_proto/inc \ + $(TOPDIR)framework/domi \ + third_party/json/include \ + third_party/protobuf/include \ + third_party/opencv/include \ + +#compiler for host infer +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_compiler + +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 +# from ome_inference.mk +LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES) +LOCAL_SRC_FILES += $(OME_HOST_SRC_FILES) +LOCAL_SRC_FILES += $(NEW_OME_DEVICE_SRC_FILES) +LOCAL_SRC_FILES += $(BUILER_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := libge_memory \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + libge_common \ + libruntime_compile \ + libresource \ + liberror_manager \ + +LOCAL_LDFLAGS := -lrt -ldl + + +include $(BUILD_HOST_SHARED_LIBRARY) + +#compiler for device +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_compiler +LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_FILE +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 +LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP +LOCAL_CFLAGS += -DOMG_DEVICE_VERSION +LOCAL_CFLAGS += -O2 +LOCAL_MODULE_CLASS := SHARED_LIBRARIES + + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(GRAPH_MANAGER_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(OMG_DEVICE_SRC_FILES) +LOCAL_SRC_FILES += $(OME_DEVICE_SRC_FILES) +LOCAL_SRC_FILES += $(BUILER_SRC_FILES) + + +LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) + +LOCAL_STATIC_LIBRARIES := libge_memory \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + libresource \ + libruntime_compile \ + libge_common \ + + + + +ifeq ($(device_os),android) +LOCAL_LDFLAGS := -ldl +else +LOCAL_LDFLAGS := -lrt -ldl +endif + +LOCAL_CFLAGS += \ + -Wall + +ifeq ($(device_os),android) +LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog +endif +include $(BUILD_SHARED_LIBRARY) diff --git a/src/ge/ge_local_engine/engine/host_cpu_engine.cc b/src/ge/ge_local_engine/engine/host_cpu_engine.cc index 9ee616ac..86f58b23 100644 --- a/src/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/src/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -124,7 +124,7 @@ Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector &named_inputs, map &named_outputs) { - GELOGD("To run host cpu op: %s", op_desc->GetName().c_str()); + GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str()); Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc); auto ret = op_kernel.Compute(op, named_inputs, named_outputs); if (ret != GRAPH_SUCCESS) { @@ -139,7 +139,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector &inputs, GE_CHECK_NOTNULL(node); GE_CHECK_NOTNULL(node->GetOpDesc()); - GELOGD("To run node by host cpu engine. node name = %s", node->GetName().c_str()); + GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str()); std::unique_ptr op_kernel; GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel)); @@ -151,7 +151,7 @@ Status HostCpuEngine::Run(NodePtr &node, const vector &inputs, GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs)); GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs)); - GELOGD("Ran node by host cpu engine successfully. name node = %s", node->GetName().c_str()); + GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str()); outputs.swap(tmp_outputs); return SUCCESS; } diff --git a/src/ge/ge_local_engine/module.mk b/src/ge/ge_local_engine/module.mk new file mode 100644 index 00000000..ee6b15c1 --- /dev/null +++ b/src/ge/ge_local_engine/module.mk @@ -0,0 +1,59 @@ +LOCAL_PATH := $(call my-dir) + + +local_lib_src_files := engine/ge_local_engine.cc \ + ops_kernel_store/ge_local_ops_kernel_info.cc \ + ops_kernel_store/op/op_factory.cc \ + ops_kernel_store/op/op.cc \ + ops_kernel_store/op/ge_deleted_op.cc \ + ops_kernel_store/op/no_op.cc \ + +local_lib_inc_path := proto/task.proto \ + ${LOCAL_PATH} \ + ${TOPDIR}inc \ + ${TOPDIR}inc/external \ + ${TOPDIR}inc/external/graph \ + $(TOPDIR)libc_sec/include \ + ${TOPDIR}third_party/protobuf/include \ + ${TOPDIR}inc/framework \ + $(TOPDIR)framework/domi \ + +#compiler for host +include $(CLEAR_VARS) +LOCAL_MODULE := libge_local_engine +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libregister \ + libruntime + +LOCAL_SRC_FILES := $(local_lib_src_files) +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} + +#compiler for atc +include $(CLEAR_VARS) +LOCAL_MODULE := atclib/libge_local_engine +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libregister \ + libruntime_compile + +LOCAL_SRC_FILES := $(local_lib_src_files) +LOCAL_C_INCLUDES := $(local_lib_inc_path) + +include ${BUILD_HOST_SHARED_LIBRARY} diff --git a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc index 857cee6b..adf936c0 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_info.cc @@ -81,7 +81,7 @@ Status GeLocalOpsKernelInfoStore::CalcOpRunningParam(Node &ge_node) { const string node_name = ge_node.GetName(); const string node_type = ge_node.GetType(); size_t output_size = op_desc->GetOutputsSize(); - GELOGD("Calc op[%s:%s] op running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); + GELOGD("Calc op[%s:%s] running param, output size=%zu.", node_name.c_str(), node_type.c_str(), output_size); for (size_t i = 0; i < output_size; ++i) { GeTensorDesc output_tensor = op_desc->GetOutputDesc(static_cast(i)); diff --git a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc index d595be8d..62fe1b5d 100644 --- a/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc +++ b/src/ge/ge_local_engine/ops_kernel_store/op/no_op.cc @@ -24,7 +24,7 @@ namespace ge_local { NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} Status NoOp::Run() { - GELOGI("Node:%s type is %s, no need gen task.", name_.c_str(), type_.c_str()); + GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str()); // Do nothing return SUCCESS; } diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk new file mode 100644 index 00000000..2d4bcf6a --- /dev/null +++ b/src/ge/ge_runner.mk @@ -0,0 +1,429 @@ +LOCAL_PATH := $(call my-dir) + +LIBGE_LOCAL_SRC_FILES := \ + proto/fusion_model.proto \ + proto/optimizer_priority.proto \ + common/formats/format_transfers/datatype_transfer.cc \ + common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ + common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ + common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ + common/formats/format_transfers/format_transfer_fractal_nz.cc \ + common/formats/format_transfers/format_transfer_fractal_z.cc \ + common/formats/format_transfers/format_transfer_fractal_zz.cc \ + common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ + common/formats/format_transfers/format_transfer_fracz_nchw.cc \ + common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ + common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ + common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ + common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ + common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ + common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ + common/formats/format_transfers/format_transfer_transpose.cc \ + common/formats/formats.cc \ + common/formats/utils/formats_trans_utils.cc \ + common/fp16_t.cc \ + common/ge/plugin_manager.cc\ + common/helper/model_cache_helper.cc \ + common/profiling/profiling_manager.cc \ + engine_manager/dnnengine_manager.cc \ + ge_local_engine/engine/host_cpu_engine.cc \ + generator/ge_generator.cc \ + generator/generator_api.cc \ + graph/build/graph_builder.cc \ + graph/build/label_allocator.cc \ + graph/build/logical_stream_allocator.cc \ + graph/build/model_builder.cc \ + graph/build/run_context.cc \ + graph/build/stream_allocator.cc \ + graph/build/stream_graph_optimizer.cc \ + graph/build/task_generator.cc \ + graph/common/bcast.cc \ + graph/common/omg_util.cc \ + graph/common/transop_util.cc \ + graph/execute/graph_execute.cc \ + graph/label/case_label_maker.cc \ + graph/label/if_label_maker.cc \ + graph/label/label_maker.cc \ + graph/label/partitioned_call_label_maker.cc \ + graph/label/while_label_maker.cc \ + graph/load/graph_loader.cc \ + graph/load/new_model_manager/cpu_queue_schedule.cc \ + graph/load/new_model_manager/data_dumper.cc \ + graph/load/new_model_manager/data_inputer.cc \ + graph/load/new_model_manager/davinci_model.cc \ + graph/load/new_model_manager/davinci_model_parser.cc \ + graph/load/new_model_manager/model_manager.cc \ + graph/load/new_model_manager/model_utils.cc \ + graph/load/new_model_manager/aipp_utils.cc \ + graph/load/new_model_manager/task_info/end_graph_task_info.cc \ + graph/load/new_model_manager/task_info/event_record_task_info.cc \ + graph/load/new_model_manager/task_info/event_wait_task_info.cc \ + graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ + graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/new_model_manager/task_info/hccl_task_info.cc \ + graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/new_model_manager/task_info/kernel_task_info.cc \ + graph/load/new_model_manager/task_info/label_set_task_info.cc \ + graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/new_model_manager/task_info/stream_active_task_info.cc \ + graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ + graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/new_model_manager/task_info/task_info.cc \ + graph/load/new_model_manager/tbe_handle_store.cc \ + graph/load/new_model_manager/zero_copy_task.cc \ + graph/load/output/output.cc \ + graph/manager/graph_context.cc \ + graph/manager/graph_manager.cc \ + graph/manager/graph_manager_utils.cc \ + graph/manager/graph_mem_allocator.cc \ + graph/manager/graph_caching_allocator.cc \ + graph/manager/graph_var_manager.cc \ + graph/manager/model_manager/event_manager.cc \ + graph/manager/trans_var_data_utils.cc \ + graph/manager/util/debug.cc \ + graph/manager/util/hcom_util.cc \ + graph/manager/util/rt_context_util.cc \ + graph/manager/util/variable_accelerate_ctrl.cc \ + graph/optimize/graph_optimize.cc \ + graph/optimize/optimizer/allreduce_fusion_pass.cc \ + graph/optimize/summary_optimize.cc \ + graph/partition/engine_place.cc \ + graph/partition/graph_partition.cc \ + graph/passes/addn_pass.cc \ + graph/passes/aicpu_constant_folding_pass.cc \ + graph/passes/assert_pass.cc \ + graph/passes/atomic_addr_clean_pass.cc \ + graph/partition/dynamic_shape_partition.cc \ + graph/passes/base_pass.cc \ + graph/passes/cast_remove_pass.cc \ + graph/passes/cast_translate_pass.cc \ + graph/passes/common_subexpression_elimination_pass.cc \ + graph/passes/transop_symmetry_elimination_pass.cc \ + graph/passes/compile_nodes_pass.cc \ + graph/passes/constant_folding_pass.cc \ + graph/passes/constant_fuse_same_pass.cc \ + graph/passes/control_trigger_pass.cc \ + graph/passes/dimension_adjust_pass.cc \ + graph/passes/dimension_compute_pass.cc \ + graph/passes/dropout_pass.cc \ + graph/passes/hccl_group_pass.cc \ + graph/passes/switch_fusion_pass.cc \ + graph/passes/switch_split_pass.cc \ + graph/passes/enter_pass.cc \ + graph/passes/flow_ctrl_pass.cc \ + host_kernels/transpose_kernel.cc \ + host_kernels/add_kernel.cc \ + host_kernels/broadcast_args_kernel.cc \ + host_kernels/broadcast_gradient_args_kernel.cc \ + host_kernels/cast_kernel.cc \ + host_kernels/concat_offset_kernel.cc \ + host_kernels/concat_v2_kernel.cc \ + host_kernels/dynamic_stitch_kernel.cc \ + host_kernels/empty_kernel.cc \ + host_kernels/expanddims_kernel.cc \ + host_kernels/fill_kernel.cc \ + host_kernels/floordiv_kernel.cc \ + host_kernels/floormod_kernel.cc \ + host_kernels/gather_v2_kernel.cc \ + host_kernels/greater_kernel.cc \ + host_kernels/kernel_utils.cc \ + host_kernels/maximum_kernel.cc \ + host_kernels/mul_kernel.cc \ + host_kernels/pack_kernel.cc \ + host_kernels/permute_kernel.cc \ + host_kernels/range_kernel.cc \ + host_kernels/rank_kernel.cc \ + host_kernels/reduce_prod_kernel.cc \ + host_kernels/reshape_kernel.cc \ + host_kernels/rsqrt_kernel.cc \ + host_kernels/shape_kernel.cc \ + host_kernels/shape_n_kernel.cc \ + host_kernels/size_kernel.cc \ + host_kernels/slice_d_kernel.cc \ + host_kernels/slice_kernel.cc \ + host_kernels/squeeze_kernel.cc \ + host_kernels/unsqueeze_kernel.cc \ + host_kernels/ssd_prior_box_kernel.cc \ + host_kernels/strided_slice_kernel.cc \ + host_kernels/sub_kernel.cc \ + host_kernels/transdata_kernel.cc \ + host_kernels/unpack_kernel.cc \ + graph/passes/folding_pass.cc \ + graph/passes/get_original_format_pass.cc \ + graph/passes/guarantee_const_pass.cc \ + graph/passes/hccl_memcpy_pass.cc \ + graph/passes/identify_reference_pass.cc \ + graph/passes/identity_pass.cc \ + graph/passes/infershape_pass.cc \ + graph/passes/isolated_op_remove_pass.cc \ + graph/passes/iterator_op_pass.cc \ + graph/passes/link_gen_mask_nodes_pass.cc \ + graph/passes/merge_pass.cc \ + graph/passes/multi_batch_pass.cc \ + graph/passes/net_output_pass.cc \ + graph/passes/next_iteration_pass.cc \ + graph/passes/no_use_reshape_remove_pass.cc \ + graph/passes/pass_manager.cc \ + graph/passes/pass_utils.cc \ + graph/passes/permute_pass.cc \ + graph/passes/placeholder_with_default_pass.cc \ + graph/passes/prevent_gradient_pass.cc \ + graph/passes/print_op_pass.cc \ + graph/passes/prune_pass.cc \ + graph/passes/ctrl_edge_transfer_pass.cc \ + graph/passes/replace_with_empty_const_pass.cc \ + graph/passes/reshape_remove_pass.cc \ + graph/passes/reshape_recovery_pass.cc \ + graph/passes/resource_pair_add_control_pass.cc \ + graph/passes/resource_pair_remove_control_pass.cc \ + graph/passes/same_transdata_breadth_fusion_pass.cc \ + graph/passes/save_pass.cc \ + graph/passes/shape_operate_op_remove_pass.cc \ + graph/passes/snapshot_pass.cc \ + graph/passes/stop_gradient_pass.cc \ + graph/passes/subgraph_pass.cc \ + graph/passes/data_pass.cc \ + graph/passes/switch_data_edges_bypass.cc \ + graph/passes/switch_logic_remove_pass.cc \ + graph/passes/switch_op_pass.cc \ + graph/passes/switch_dead_branch_elimination.cc \ + graph/passes/replace_transshape_pass.cc \ + graph/passes/transop_breadth_fusion_pass.cc \ + graph/passes/transop_depth_fusion_pass.cc \ + graph/passes/transop_nearby_allreduce_fusion_pass.cc \ + graph/passes/transop_without_reshape_fusion_pass.cc \ + graph/passes/transpose_transdata_pass.cc \ + graph/passes/unused_const_pass.cc \ + graph/passes/unused_op_remove_pass.cc \ + graph/passes/var_is_initialized_op_pass.cc \ + graph/passes/parallel_concat_start_op_pass.cc \ + graph/passes/cond_pass.cc \ + graph/passes/cond_remove_pass.cc \ + graph/passes/for_pass.cc \ + graph/passes/variable_format_pass.cc \ + graph/passes/variable_op_pass.cc \ + graph/passes/variable_prepare_op_pass.cc \ + graph/passes/variable_ref_delete_op_pass.cc \ + graph/passes/variable_ref_useless_control_out_delete_pass.cc \ + graph/preprocess/graph_preprocess.cc \ + graph/preprocess/insert_op/ge_aipp_op.cc \ + graph/preprocess/insert_op/util_insert_aipp_op.cc \ + graph/preprocess/multi_batch_copy_graph.cc \ + init/gelib.cc \ + model/ge_model.cc \ + model/ge_root_model.cc \ + omm/csa_interact.cc \ + opskernel_manager/ops_kernel_manager.cc \ + session/inner_session.cc \ + session/session_manager.cc \ + single_op/single_op.cc \ + single_op/single_op_manager.cc \ + single_op/single_op_model.cc \ + single_op/stream_resource.cc \ + single_op/task/build_task_utils.cc \ + single_op/task/op_task.cc \ + single_op/task/tbe_task_builder.cc \ + single_op/task/aicpu_task_builder.cc \ + hybrid/common/tensor_value.cc \ + hybrid/common/npu_memory_allocator.cc \ + hybrid/executor/rt_callback_manager.cc \ + hybrid/executor/node_state.cc \ + hybrid/executor/node_done_manager.cc \ + hybrid/executor/hybrid_profiler.cc \ + hybrid/executor/hybrid_model_executor.cc \ + hybrid/executor/hybrid_model_async_executor.cc \ + hybrid/executor/hybrid_execution_context.cc \ + hybrid/executor/worker/task_compile_engine.cc \ + hybrid/executor/worker/shape_inference_engine.cc \ + hybrid/executor/worker/execution_engine.cc \ + hybrid/model/hybrid_model.cc \ + hybrid/model/hybrid_model_builder.cc \ + hybrid/model/node_item.cc \ + hybrid/node_executor/aicore/aicore_node_executor.cc \ + hybrid/node_executor/aicore/aicore_op_task.cc \ + hybrid/node_executor/aicore/aicore_task_builder.cc \ + hybrid/node_executor/aicore/aicore_task_compiler.cc \ + hybrid/node_executor/aicpu/aicpu_ext_info.cc \ + hybrid/node_executor/aicpu/aicpu_node_executor.cc \ + hybrid/node_executor/compiledsubgraph/known_node_executor.cc \ + hybrid/node_executor/hostcpu/ge_local_node_executor.cc \ + hybrid/node_executor/node_executor.cc \ + hybrid/node_executor/task_context.cc \ + hybrid/hybrid_davinci_model.cc \ + executor/ge_executor.cc \ + +LIBCLIENT_LOCAL_SRC_FILES := \ + proto/ge_api.proto \ + client/ge_api.cc \ + +RUNNER_LOCAL_C_INCLUDES := \ + $(LOCAL_PATH) ./ \ + $(LOCAL_PATH)/../ \ + $(LOCAL_PATH)/../../ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/common \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/common \ + $(TOPDIR)inc/graph \ + $(TOPDIR)inc/runtime \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)ops/built-in/op_proto/inc \ + proto/fwk_adapter.proto \ + proto/ge_ir.proto \ + proto/insert_op.proto \ + proto/om.proto \ + proto/op_mapping_info.proto \ + proto/task.proto \ + proto/tensorflow/attr_value.proto \ + proto/tensorflow/function.proto \ + proto/tensorflow/graph.proto \ + proto/tensorflow/node_def.proto \ + proto/tensorflow/op_def.proto \ + proto/tensorflow/resource_handle.proto \ + proto/tensorflow/tensor.proto \ + proto/tensorflow/tensor_shape.proto \ + proto/tensorflow/types.proto \ + proto/tensorflow/versions.proto \ + third_party/json/include \ + third_party/opencv/include \ + third_party/protobuf/include \ + + + +#compiler for GeRunner +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_runner + +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + + +LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := libge_memory \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + libge_common \ + libhccl \ + libmsprof \ + liberror_manager \ + + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_SHARED_LIBRARIES += \ + libruntime \ + libresource \ + +include $(BUILD_HOST_SHARED_LIBRARY) + + +# add engine_conf.json to host +include $(CLEAR_VARS) + +LOCAL_MODULE := engine_conf.json + +LOCAL_SRC_FILES := engine_manager/engine_conf.json + +LOCAL_MODULE_CLASS := ETC + +LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json +include $(BUILD_HOST_PREBUILT) + +# add optimizer_priority.pbtxt to host +include $(CLEAR_VARS) + +LOCAL_MODULE := optimizer_priority.pbtxt + +LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt + +LOCAL_MODULE_CLASS := ETC + +LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt +include $(BUILD_HOST_PREBUILT) + +#compiler for GeRunner static lib +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_runner + +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD + +LOCAL_CFLAGS += -g -O0 + + +LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := libge_memory \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libslog \ + libmmpa \ + libhccl \ + libmsprof \ + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_SHARED_LIBRARIES += \ + libruntime \ + libresource \ + +include $(BUILD_HOST_STATIC_LIBRARY) + +#compiler for GeRunner static lib device +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_runner + +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD + +LOCAL_CFLAGS += -g -O0 + +LOCAL_C_INCLUDES := $(RUNNER_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(LIBGE_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := libge_memory \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libslog \ + libmmpa \ + libhccl \ + libmsprof \ + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_SHARED_LIBRARIES += \ + libruntime \ + libresource \ + +include $(BUILD_STATIC_LIBRARY) diff --git a/src/ge/ge_train.mk b/src/ge/ge_train.mk new file mode 100644 index 00000000..767ce86b --- /dev/null +++ b/src/ge/ge_train.mk @@ -0,0 +1,333 @@ +LOCAL_PATH := $(call my-dir) + +COMMON_LOCAL_SRC_FILES := \ + proto/fusion_model.proto \ + proto/optimizer_priority.proto \ + session/inner_session.cc \ + session/session_manager.cc \ + common/ge/plugin_manager.cc\ + common/fp16_t.cc \ + common/formats/utils/formats_trans_utils.cc \ + common/formats/format_transfers/datatype_transfer.cc \ + common/formats/format_transfers/format_transfer_transpose.cc \ + common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc \ + common/formats/format_transfers/format_transfer_fractal_z.cc \ + common/formats/format_transfers/format_transfer_fractal_nz.cc \ + common/formats/format_transfers/format_transfer_fractal_zz.cc \ + common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc \ + common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc \ + common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc \ + common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc \ + common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc \ + common/formats/format_transfers/format_transfer_fracz_nchw.cc \ + common/formats/format_transfers/format_transfer_fracz_nhwc.cc \ + common/formats/format_transfers/format_transfer_fracz_hwcn.cc \ + common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc \ + common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc \ + common/formats/formats.cc \ + init/gelib.cc \ + engine_manager/dnnengine_manager.cc \ + opskernel_manager/ops_kernel_manager.cc \ + graph/manager/graph_manager.cc \ + graph/manager/graph_manager_utils.cc \ + graph/manager/graph_context.cc \ + graph/preprocess/graph_preprocess.cc \ + graph/preprocess/multi_batch_copy_graph.cc \ + graph/execute/graph_execute.cc \ + graph/load/graph_loader.cc \ + graph/optimize/graph_optimize.cc \ + graph/passes/folding_pass.cc \ + graph/optimize/summary_optimize.cc \ + graph/build/graph_builder.cc \ + graph/partition/engine_place.cc \ + graph/partition/graph_partition.cc \ + graph/partition/dynamic_shape_partition.cc \ + generator/ge_generator.cc \ + generator/generator_api.cc \ + common/profiling/profiling_manager.cc \ + ge_local_engine/engine/host_cpu_engine.cc \ + common/helper/model_cache_helper.cc \ + +OMG_HOST_SRC_FILES := \ + model/ge_model.cc \ + model/ge_root_model.cc \ + graph/common/transop_util.cc \ + graph/manager/graph_var_manager.cc \ + graph/manager/trans_var_data_utils.cc \ + omm/csa_interact.cc \ + graph/passes/pass_manager.cc \ + graph/passes/pass_utils.cc \ + graph/passes/base_pass.cc \ + graph/passes/resource_pair_add_control_pass.cc \ + graph/passes/resource_pair_remove_control_pass.cc \ + graph/passes/constant_folding_pass.cc \ + graph/passes/aicpu_constant_folding_pass.cc \ + graph/passes/reshape_remove_pass.cc \ + graph/passes/reshape_recovery_pass.cc \ + graph/passes/transop_breadth_fusion_pass.cc \ + graph/passes/transop_depth_fusion_pass.cc \ + graph/passes/same_transdata_breadth_fusion_pass.cc \ + graph/passes/transop_without_reshape_fusion_pass.cc \ + graph/passes/compile_nodes_pass.cc \ + graph/passes/transop_nearby_allreduce_fusion_pass.cc \ + graph/passes/variable_prepare_op_pass.cc \ + graph/passes/variable_ref_delete_op_pass.cc \ + graph/passes/variable_ref_useless_control_out_delete_pass.cc \ + graph/passes/variable_op_pass.cc \ + graph/passes/cast_remove_pass.cc \ + graph/passes/replace_transshape_pass.cc \ + graph/passes/transpose_transdata_pass.cc \ + graph/passes/identify_reference_pass.cc \ + graph/passes/variable_format_pass.cc \ + graph/passes/subgraph_pass.cc \ + graph/passes/data_pass.cc \ + graph/passes/net_output_pass.cc \ + graph/passes/constant_fuse_same_pass.cc \ + graph/passes/print_op_pass.cc \ + graph/passes/no_use_reshape_remove_pass.cc \ + graph/passes/iterator_op_pass.cc \ + graph/passes/atomic_addr_clean_pass.cc \ + graph/optimize/optimizer/allreduce_fusion_pass.cc \ + graph/common/omg_util.cc \ + graph/common/bcast.cc \ + graph/passes/dimension_compute_pass.cc \ + graph/passes/dimension_adjust_pass.cc \ + graph/passes/get_original_format_pass.cc \ + graph/passes/shape_operate_op_remove_pass.cc \ + graph/passes/unused_op_remove_pass.cc \ + graph/passes/assert_pass.cc \ + graph/passes/dropout_pass.cc \ + graph/passes/infershape_pass.cc \ + graph/passes/unused_const_pass.cc \ + graph/passes/isolated_op_remove_pass.cc \ + graph/passes/permute_pass.cc \ + graph/passes/ctrl_edge_transfer_pass.cc \ + host_kernels/broadcast_gradient_args_kernel.cc \ + host_kernels/greater_kernel.cc \ + host_kernels/gather_v2_kernel.cc \ + host_kernels/maximum_kernel.cc \ + host_kernels/floormod_kernel.cc \ + host_kernels/floordiv_kernel.cc \ + host_kernels/range_kernel.cc \ + host_kernels/shape_kernel.cc \ + host_kernels/size_kernel.cc \ + host_kernels/shape_n_kernel.cc \ + host_kernels/rank_kernel.cc \ + host_kernels/broadcast_args_kernel.cc \ + host_kernels/fill_kernel.cc \ + host_kernels/empty_kernel.cc \ + host_kernels/expanddims_kernel.cc \ + host_kernels/reshape_kernel.cc \ + host_kernels/squeeze_kernel.cc \ + host_kernels/kernel_utils.cc \ + host_kernels/cast_kernel.cc \ + host_kernels/transdata_kernel.cc \ + host_kernels/transpose_kernel.cc \ + host_kernels/permute_kernel.cc \ + host_kernels/pack_kernel.cc \ + host_kernels/concat_v2_kernel.cc \ + host_kernels/concat_offset_kernel.cc \ + host_kernels/strided_slice_kernel.cc \ + host_kernels/ssd_prior_box_kernel.cc \ + host_kernels/add_kernel.cc \ + host_kernels/unpack_kernel.cc \ + host_kernels/sub_kernel.cc \ + host_kernels/mul_kernel.cc \ + host_kernels/reduce_prod_kernel.cc \ + host_kernels/rsqrt_kernel.cc \ + host_kernels/slice_kernel.cc \ + host_kernels/slice_d_kernel.cc \ + host_kernels/dynamic_stitch_kernel.cc \ + graph/passes/stop_gradient_pass.cc \ + graph/passes/prevent_gradient_pass.cc \ + graph/passes/identity_pass.cc \ + graph/passes/placeholder_with_default_pass.cc \ + graph/passes/snapshot_pass.cc \ + graph/passes/guarantee_const_pass.cc \ + graph/passes/var_is_initialized_op_pass.cc \ + graph/passes/parallel_concat_start_op_pass.cc \ + graph/passes/cast_translate_pass.cc \ + graph/passes/addn_pass.cc \ + graph/passes/common_subexpression_elimination_pass.cc \ + graph/passes/transop_symmetry_elimination_pass.cc \ + graph/passes/save_pass.cc \ + graph/passes/switch_dead_branch_elimination.cc \ + graph/passes/merge_pass.cc \ + graph/passes/prune_pass.cc \ + graph/passes/flow_ctrl_pass.cc \ + graph/passes/control_trigger_pass.cc \ + graph/passes/switch_data_edges_bypass.cc \ + graph/passes/switch_op_pass.cc \ + graph/passes/multi_batch_pass.cc \ + graph/passes/switch_logic_remove_pass.cc \ + graph/passes/next_iteration_pass.cc \ + graph/passes/cond_pass.cc \ + graph/passes/cond_remove_pass.cc \ + graph/passes/for_pass.cc \ + graph/passes/enter_pass.cc \ + graph/passes/hccl_memcpy_pass.cc \ + graph/passes/link_gen_mask_nodes_pass.cc \ + graph/passes/replace_with_empty_const_pass.cc \ + graph/passes/hccl_group_pass.cc \ + +OME_SRC_FILES := \ + graph/manager/graph_mem_allocator.cc \ + graph/manager/graph_caching_allocator.cc \ + graph/manager/model_manager/event_manager.cc \ + graph/manager/util/debug.cc \ + graph/manager/util/rt_context_util.cc \ + graph/manager/util/variable_accelerate_ctrl.cc \ + graph/manager/util/hcom_util.cc \ + graph/load/new_model_manager/model_manager.cc \ + graph/load/new_model_manager/data_inputer.cc \ + graph/load/new_model_manager/davinci_model.cc \ + graph/load/new_model_manager/davinci_model_parser.cc \ + graph/load/new_model_manager/model_utils.cc \ + graph/load/new_model_manager/tbe_handle_store.cc \ + graph/load/new_model_manager/cpu_queue_schedule.cc \ + graph/load/new_model_manager/zero_copy_task.cc \ + graph/load/output/output.cc \ + graph/load/new_model_manager/data_dumper.cc \ + graph/load/new_model_manager/task_info/task_info.cc \ + graph/load/new_model_manager/task_info/event_record_task_info.cc \ + graph/load/new_model_manager/task_info/event_wait_task_info.cc \ + graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ + graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ + graph/load/new_model_manager/task_info/hccl_task_info.cc \ + graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ + graph/load/new_model_manager/task_info/kernel_task_info.cc \ + graph/load/new_model_manager/task_info/label_set_task_info.cc \ + graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ + graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ + graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ + graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ + graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ + graph/load/new_model_manager/task_info/stream_active_task_info.cc \ + graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ + graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ + graph/load/new_model_manager/task_info/end_graph_task_info.cc \ + graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ + graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ + single_op/task/op_task.cc \ + single_op/task/build_task_utils.cc \ + single_op/task/tbe_task_builder.cc \ + single_op/task/aicpu_task_builder.cc \ + single_op/single_op.cc \ + single_op/single_op_model.cc \ + single_op/stream_resource.cc \ + single_op/single_op_manager.cc \ + hybrid/hybrid_davinci_model_stub.cc \ + + +COMMON_LOCAL_C_INCLUDES := \ + proto/om.proto \ + proto/task.proto \ + proto/insert_op.proto \ + proto/ge_ir.proto \ + proto/fwk_adapter.proto \ + proto/op_mapping_info.proto \ + proto/tensorflow/attr_value.proto \ + proto/tensorflow/function.proto \ + proto/tensorflow/graph.proto \ + proto/tensorflow/node_def.proto \ + proto/tensorflow/op_def.proto \ + proto/tensorflow/resource_handle.proto \ + proto/tensorflow/tensor.proto \ + proto/tensorflow/tensor_shape.proto \ + proto/tensorflow/types.proto \ + proto/tensorflow/versions.proto \ + $(LOCAL_PATH) ./ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/common \ + $(TOPDIR)inc/runtime \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)ops/built-in/op_proto/inc \ + third_party/json/include \ + third_party/protobuf/include \ + third_party/opencv/include \ + +NEW_OMG_HOST_SRC_FILES := \ + graph/preprocess/insert_op/util_insert_aipp_op.cc \ + graph/preprocess/insert_op/ge_aipp_op.cc \ + graph/build/model_builder.cc \ + graph/build/task_generator.cc \ + graph/build/stream_allocator.cc \ + graph/build/logical_stream_allocator.cc \ + graph/build/stream_graph_optimizer.cc \ + graph/build/run_context.cc \ + graph/build/label_allocator.cc \ + graph/label/label_maker.cc \ + graph/label/if_label_maker.cc \ + graph/label/case_label_maker.cc \ + graph/label/while_label_maker.cc \ + graph/label/partitioned_call_label_maker.cc \ + + + +#compiler for host train +include $(CLEAR_VARS) + +LOCAL_MODULE := libge_train + +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 +LOCAL_CFLAGS += -DDAVINCI_CLOUD -DDAVINCI_TRAIN -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING +LOCAL_CFLAGS += -DFMK_SUPPORT_DEBUG +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) +LOCAL_SRC_FILES += $(OMG_HOST_SRC_FILES) +LOCAL_SRC_FILES += $(OME_SRC_FILES) +LOCAL_SRC_FILES += $(NEW_OMG_HOST_SRC_FILES) + +LOCAL_STATIC_LIBRARIES := libge_memory \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libprotobuf \ + libslog \ + libmmpa \ + libgraph \ + libregister \ + libge_common \ + libhccl \ + libmsprof \ + + +LOCAL_LDFLAGS := -lrt -ldl + +LOCAL_SHARED_LIBRARIES += \ + libruntime \ + libresource \ + +include $(BUILD_HOST_SHARED_LIBRARY) + +# add engine_conf.json to host +include $(CLEAR_VARS) + +LOCAL_MODULE := engine_conf.json + +LOCAL_SRC_FILES := engine_manager/engine_conf.json + +LOCAL_MODULE_CLASS := ETC + +LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/engine_conf.json +include $(BUILD_HOST_PREBUILT) + +# add optimizer_priority.pbtxt to host +include $(CLEAR_VARS) + +LOCAL_MODULE := optimizer_priority.pbtxt + +LOCAL_SRC_FILES := opskernel_manager/optimizer_priority.pbtxt + +LOCAL_MODULE_CLASS := ETC + +LOCAL_INSTALLED_PATH := $(HOST_OUT_ROOT)/optimizer_priority.pbtxt +include $(BUILD_HOST_PREBUILT) diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc index f25a67cd..f0b69242 100644 --- a/src/ge/generator/ge_generator.cc +++ b/src/ge/generator/ge_generator.cc @@ -22,10 +22,13 @@ #include "common/util.h" #include "framework/common/debug/ge_log.h" #include "ge/ge_api.h" +#include "graph/ge_context.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/graph_manager.h" +#include "graph/manager/util/rt_context_util.h" #include "graph/opsproto_manager.h" #include "graph/utils/graph_utils.h" +#include "graph/utils/type_utils.h" #include "model/ge_model.h" #include "init/gelib.h" @@ -108,7 +111,7 @@ static Status CheckEngineTypeSupport(const OpDescPtr &op_desc, OpEngineType engi return FAILED; } -static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index, +static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTensorDesc &tensor, int32_t index, bool attr) { GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); @@ -122,6 +125,17 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const if (data_op == nullptr) { return FAILED; } + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); + auto input_desc = op_desc->MutableInputDesc(index); + GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID); + ge::Format old_format = input_desc->GetFormat(); + if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { + input_desc->SetFormat(FORMAT_ND); + input_desc->SetOriginFormat(FORMAT_ND); + (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format)); + (void)AttrUtils::SetBool(data_op, "_is_single_op", true); + } GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); @@ -139,10 +153,21 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const } static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, const vector &outputs) { - OpDescPtr op_desc = MakeShared(NODE_NAME_NET_OUTPUT, NETOUTPUT); + OpDescPtr op_desc = MakeShared(graph->GetName() + "_" + NODE_NAME_NET_OUTPUT, NETOUTPUT); if (op_desc == nullptr) { return FAILED; } + auto single_op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID); + auto output_desc = single_op_desc->MutableOutputDesc(0); + GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID); + ge::Format old_format = output_desc->GetFormat(); + if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) { + output_desc->SetFormat(FORMAT_ND); + output_desc->SetOriginFormat(FORMAT_ND); + (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format)); + (void)AttrUtils::SetBool(op_desc, "_is_single_op", true); + } int32_t count = 0; for (const auto &out_desc : outputs) { GeTensorDesc tensor = out_desc.GetTensorDesc(); @@ -187,6 +212,19 @@ static void GetOpsProtoPath(string &opsproto_path) { opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); } +static string GetModelNameFromFileName(const string &file_name_prefix) { + int start_position = 0; + // using output as model_name (ignore ".om") + int filename_suffixes = 3; + if (file_name_prefix.find_last_of('/') != string::npos) { + start_position += 1; + } + int end_position = file_name_prefix.length() - filename_suffixes; + string model_name = file_name_prefix.substr(start_position, end_position - start_position); + GELOGI("Get model_name from file, model_name:%s", model_name.c_str()); + return model_name; +} + class GeGenerator::Impl { public: Status BuildModel(const Graph &graph, const vector &inputs, GraphId &graph_id, GeRootModelPtr &ge_models); @@ -278,24 +316,28 @@ Status GeGenerator::GenerateInfershapeGraph(const Graph &graph) { } return ret; } - GELOGI("GenerateInfershapeGraph success."); + GELOGI("Generate infer shape graph success"); return SUCCESS; } Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector &inputs, ModelBufferData &model, bool is_offline) { + rtContext_t ctx = nullptr; + auto rt = rtCtxGetCurrent(&ctx); + if (rt != RT_ERROR_NONE) { + GELOGW("Current ctx is null."); + } else { + ge::RtContextUtil::GetInstance().SetNormalModeContext(ctx); + } GraphId graph_id; GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); - // using output as model_name (ignore ".om") - int start_position = file_name_prefix.find_last_of('/') + 1; - int end_position = file_name_prefix.length() - 3; - const string model_name = file_name_prefix.substr(start_position, end_position - start_position); + const string model_name = GetModelNameFromFileName(file_name_prefix); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!"); impl_->is_offline_ = is_offline; Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model); if (ret != SUCCESS) { - GELOGE(ret, "Build model failed"); + GELOGE(ret, "Build model failed."); if (impl_->graph_manager_.Finalize() != SUCCESS) { GELOGE(FAILED, "graph_manager finalize fail."); } @@ -316,6 +358,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr } return ret; } + + if (RtContextUtil::GetInstance().GetNormalModeContext() != nullptr) { + (void)rtCtxSetCurrent(RtContextUtil::GetInstance().GetNormalModeContext()); + } + GELOGI("GenerateOfflineModel success."); return SUCCESS; } @@ -325,11 +372,11 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in bool is_offline) { GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID); if (!inputs.empty() && (inputs.size() != op_desc->GetInputsSize())) { - GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size:%zu", inputs.size(), op_desc->GetInputsSize()); + GELOGE(PARAM_INVALID, "Tensor size: %zu, Inputs size: %zu", inputs.size(), op_desc->GetInputsSize()); return PARAM_INVALID; } if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) { - GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size:%zu", outputs.size(), op_desc->GetOutputsSize()); + GELOGE(PARAM_INVALID, "Tensor size: %zu, Outputs size: %zu", outputs.size(), op_desc->GetOutputsSize()); return PARAM_INVALID; } @@ -368,7 +415,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in } } else { for (const auto &in_desc : inputs) { - const GeTensorDesc input_desc = in_desc.GetTensorDesc(); + GeTensorDesc input_desc = in_desc.GetTensorDesc(); GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); arg_index++; } @@ -382,7 +429,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in // dump ComputeGraph. compute_graph->Dump(); Graph graph = ge::GraphUtils::CreateGraphFromComputeGraph(compute_graph); - GELOGI("ATC parser success in single op schedule."); + GELOGI("ATC parser success in single op build."); GraphId graph_id; GeRootModelPtr ge_root_model = nullptr; @@ -394,7 +441,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); map name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; - GELOGD("The opType in op_desc_tmp is: %s", op_desc_tmp->GetType().c_str()); + GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); GE_CHK_STATUS_RET_NOLOG(impl_->SaveModel(model_file_name, ge_model, model_buff)); return SUCCESS; @@ -411,7 +458,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in */ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name) { - GELOGI("Start to Build Single Op Offline Model."); + GELOGI("Start to build single op offline model."); ModelBufferData model_buff; OpEngineType engine_type = ENGINE_SYS; return BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); @@ -430,7 +477,7 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, OpEngineType engine_type, ModelBufferData &model_buff) { - GELOGI("Start to Build Single Op Online"); + GELOGI("Start to build single op online"); return BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); } @@ -449,7 +496,7 @@ Status GeGenerator::Impl::SaveModel(const string &file_name_prefix, GeModelPtr & model_helper.SetSaveMode(is_offline_); Status ret = model_helper.SaveToOmModel(model, save_param_, file_name_prefix, model_buff); if (ret != SUCCESS) { - GELOGE(ret, "Save to Om model failed"); + GELOGE(ret, "Save to om model failed"); return ret; } return SUCCESS; @@ -461,16 +508,22 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector const std::map options; Status ret = graph_manager_.AddGraph(id, graph, options); if (ret != SUCCESS) { - GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, id: %u", id); + GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph fail, graph id: %u", id); (void)graph_manager_.Finalize(); return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; } - GELOGI("models inputs.size()=%zu", inputs.size()); + GELOGI("Model inputs size is %zu", inputs.size()); graph_manager_.SetOptionsRunGraphFlag(false); - ret = graph_manager_.BuildGraph(id, inputs, ge_root_model); + struct timeval tv; + if (gettimeofday(&tv, nullptr) != 0) { + GELOGE(INTERNAL_ERROR, "get the time of day failed."); + return INTERNAL_ERROR; + } + uint64_t session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us + ret = graph_manager_.BuildGraph(id, inputs, ge_root_model, session_id); if (ret != SUCCESS) { - GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph failed, id: %u", id); + GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager build graph fail, graph id: %u", id); return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; } @@ -485,14 +538,14 @@ Status GeGenerator::Impl::GenerateInfershapeGraph(const Graph &graph, GraphId &g const std::map options; Status ret = graph_manager_.AddGraph(id, graph, options); if (ret != SUCCESS) { - GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "graphManager add graph failed, id: %u", id); + GELOGE(GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED, "GraphManager add graph failed, graph id: %u", id); (void)graph_manager_.Finalize(); return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; } ret = graph_manager_.GenerateInfershapeGraph(id); if (ret != SUCCESS) { - GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager BuildGraph failed, id: %u", id); + GELOGE(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, "GraphManager generate graph failed"); return GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED; } diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc index 6d908155..602b71bd 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.cc +++ b/src/ge/graph/build/memory/block_mem_assigner.cc @@ -160,10 +160,10 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block) { parent->child_offset_ += child->block_size_; child->deleted_block_ = true; GELOGI( - "Add block stream id:%ld [size:%zu, life time[begin:%zu, end:%zu]] to" - " block[size:%zu, life time[begin:%zu, end:%zu]]", - stream_id_, child->block_size_, child->GetLifeBegin(), child->GetLifeEnd(), parent->block_size_, - parent->GetLifeBegin(), parent->GetLifeEnd()); + "Add block[%p size:%zu, stream id:%ld life time[begin:%zu, end:%zu]] to" + " block[%p size:%zu, stream id:%ld, life time[begin:%zu, end:%zu]]", + child, child->block_size_, child->stream_id_, child->GetLifeBegin(), child->GetLifeEnd(), parent, + parent->block_size_, parent->stream_id_, parent->GetLifeBegin(), parent->GetLifeEnd()); } } @@ -499,17 +499,17 @@ void BlockMemAssigner::InitReuseFlag() { bool pre_reuse_flag = true; bool post_reuse_flag = true; for (auto &node_index_io : pair.second) { - if (node_index_io.io_type == kIn) { + if (node_index_io.io_type_ == kIn) { continue; } - OutDataAnchorPtr out_anchor = node_index_io.node->GetOutDataAnchor(node_index_io.index); + OutDataAnchorPtr out_anchor = node_index_io.node_->GetOutDataAnchor(node_index_io.index_); if (out_anchor == nullptr) { continue; } bool out_flg = false; - if (node_index_io.node->GetOutDataNodes().empty()) { + if (node_index_io.node_->GetOutDataNodes().empty()) { out_flg = true; } for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { @@ -643,7 +643,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, CanReuseByStream(map_iter->second, *reusable_block)) { GELOGD("Cross stream mem reuse, target stream:%ld, current stream:%ld", reusable_block->stream_id_, stream_id); - reusable_block->AddNodeTypeIndex({n, mem_type, out_index}, real_size, no_align_size); + reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -660,7 +660,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } } - auto block = new (std::nothrow) MemoryBlock(block_size, is_reuse_memory); + auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); // Data and netoutput need zero copy block @@ -688,7 +688,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, auto node_op_desc = n->GetOpDesc(); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); MemoryBlock *block = nullptr; - NodeIndexIO node_index_io = NodeIndexIO(n, index, kOut); + NodeIndexIO node_index_io(n, index, kOut); int64_t size = 0; auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc != nullptr) { @@ -701,7 +701,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, if (IsSymbolExist(node_index_io)) { std::string symbol = anchor_to_symbol_[node_index_io.ToString()]; block = symbol_blocks_[symbol]; - block->AddNodeTypeIndex({n, kOutput, index}, size, no_align_size); + block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); block->ref_count_++; } else { int64_t max_size = size; @@ -749,7 +749,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, GELOGI("Get dst_reuse_input_index failed")); if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { - block->AddNodeTypeIndex({owner_node, kOutput, i}, block->Size(), block->Size()); + block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); out_count_reuse_input += 1; reuse_input = true; } @@ -775,31 +775,6 @@ bool IsOutputBlock(const ge::InDataAnchorPtr &in_data_anchor) { return false; } -// current node's output uses previous node's output memory -bool IsReferencePreviousNodeOutputMemory(const ge::NodePtr &node, uint32_t output_index) { - // Get the reference type of the node, default is false - bool is_ref = false; - // If GetBool fail, is_ref is false. - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - return false; - } - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); - if (!is_ref) { - return false; - } - const string &output_name = op_desc->GetOutputNameByIndex(output_index); - for (const auto &input_name : op_desc->GetAllInputNames()) { - if (!input_name.empty() && output_name == input_name) { - int input_index = op_desc->GetInputIndexByName(input_name); - GELOGI("Reference memory:name[%s] output[%s][%u] ref to input[%s][%d] ", op_desc->GetName().c_str(), - output_name.c_str(), output_index, input_name.c_str(), input_index); - return true; - } - } - return false; -} - // atomic out memory will be reassigned bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool is_atomic, bool out_node_set_continuous_input) { @@ -920,58 +895,57 @@ void CheckAndGetOpReuseEnv(const string &env, vector &env_vec, bool &op_ } Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector &ranges) { - auto node_op_desc = node->GetOpDesc(); - int64_t stream_id = node_op_desc->GetStreamId(); + auto op_desc = node->GetOpDesc(); + int64_t stream_id = op_desc->GetStreamId(); vector memorys_type; - bool has_mem_type_attr = ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); - GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", node_op_desc->GetName().c_str(), - node_op_desc->GetOutputsSize(), memorys_type.size()); - if (has_mem_type_attr && (memorys_type.size() != node_op_desc->GetOutputsSize())) { + bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); + GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(), + op_desc->GetOutputsSize(), memorys_type.size()); + if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", - node_op_desc->GetName().c_str(), node_op_desc->GetOutputsSize(), memorys_type.size()); + op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); return INTERNAL_ERROR; } is_op_reuse_mem_ = true; if (op_reuse_env_valid_ == true) { vector::iterator it_name = - std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetName()); + std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); vector::iterator it_type = - std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), node_op_desc->GetType()); + std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetType()); GE_IF_BOOL_EXEC(it_name != op_no_reuse_mem_vec_.end() || it_type != op_no_reuse_mem_vec_.end(), is_op_reuse_mem_ = false;); } bool is_atomic = false; // If GetBool fail, is_atomic is false. - (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); + (void)ge::AttrUtils::GetBool(op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic); // Allocate memory for the current node and release node memory of the same size in the workspace GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", ReleaseMemorys(stream_workspace_blocks_[stream_id], reusable_blocks_);) - for (uint32_t i = 0; i < static_cast(node_op_desc->GetOutputsSize()); i++) { + for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { int64_t size = 0; - auto output_op_desc = node_op_desc->GetOutputDescPtr(i); + auto output_op_desc = op_desc->GetOutputDescPtr(i); if (output_op_desc != nullptr) { GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { - GELOGI("fusion: node[%s], output[%s], output memory type [%d]", node_op_desc->GetName().c_str(), - node_op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); + GELOGI("fusion: node[%s], output[%s], output memory type [%d]", op_desc->GetName().c_str(), + op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; - bool no_need_assign_memory = - ((size == 0) || CheckIsZeroMemNodeType(node->GetType()) || IsReferencePreviousNodeOutputMemory(node, i)); + bool no_need_assign_memory = ((size == 0) || CheckIsZeroMemNodeType(node->GetType())); if (!no_need_assign_memory) { out_node_set_continuous_input = IsOutNodeSetContinuousInput(node, i, peer_name, peer_input_index); no_need_assign_memory = IsAtomicOutputMemory(node, i, is_atomic, out_node_set_continuous_input); } if (no_need_assign_memory) { - zero_memory_list_.emplace_back(node, kOutput, i); + zero_memory_list_.emplace_back(node, kOutput, i, false); continue; } // atomic can't be reused @@ -1049,7 +1023,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { workspace_skip_flag = true; } if (temp[i] == 0 || workspace_skip_flag) { - zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i)); + zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i), false); continue; } MemoryBlock *mem_block = ApplyMemory(GetBlockSize(static_cast(temp[i]), ranges), @@ -1067,7 +1041,9 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { (void)mem_block; // Fix warning } - GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), MergeDynamicBatchBlocks();) + bool merge_dynamic_batch = false; + GE_IF_BOOL_EXEC(!(ge_disable_reuse_mem_env_ == "1"), merge_dynamic_batch = MergeDynamicBatchBlocks();) + GE_IF_BOOL_EXEC(!merge_dynamic_batch, ReuseBlocksByLifeTime();) AssignContinuousBlocks(); ResizeMemoryBlocks(); @@ -1131,7 +1107,8 @@ void MergeBlocks(std::vector &dest, std::vector &s } } -void BlockMemAssigner::MergeDynamicBatchBlocks() { +bool BlockMemAssigner::MergeDynamicBatchBlocks() { + bool merged = false; std::map> dynamic_batch_blocks; for (auto block : memory_blocks_) { if (block == nullptr) { @@ -1160,8 +1137,10 @@ void BlockMemAssigner::MergeDynamicBatchBlocks() { if (it != it_max) { GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str()); MergeBlocks(it_max->second, it->second); + merged = true; } } + return merged; } // asending order @@ -1331,9 +1310,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, siz } GELOGI( "[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]" - " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d].", + " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d] isref[%d].", graph_name.c_str(), op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, - op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block); + op_desc->GetStreamId(), block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, + node_type.ref_input); } void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) { @@ -1528,6 +1508,7 @@ void BlockMemAssigner::FindDependentStreamBetweenGraphs(const NodePtr &pre_node, bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || (node_type == HCOMBROADCAST) || (node_type == HCOMALLREDUCE) || (node_type == CONSTANTOP) || - (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT); + (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || + (node_type == HVDCALLBACKBROADCAST) || (node_type == HVDCALLBACKALLREDUCE); } } // namespace ge diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h index 7382fc72..14aba576 100644 --- a/src/ge/graph/build/memory/block_mem_assigner.h +++ b/src/ge/graph/build/memory/block_mem_assigner.h @@ -23,6 +23,7 @@ #include #include #include +#include #include "common/ge_inner_error_codes.h" #include "common/types.h" #include "common/util.h" @@ -36,13 +37,14 @@ const size_t kMaxLifeTime = 0xffffffff; enum MemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index) - : node(std::move(node)), mem_type(mem_type), index(index) {} + NodeTypeIndex(ge::NodePtr node, MemoryType mem_type, uint32_t index, bool ref_input = false) + : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} ge::NodePtr node = nullptr; MemoryType mem_type = kOutput; uint32_t index = 0; size_t life_time_end = kMaxLifeTime; + bool ref_input = false; const string GetMemType() const { if (mem_type == kOutput) { return "output"; @@ -55,9 +57,9 @@ struct NodeTypeIndex { class MemoryBlock { public: - explicit MemoryBlock(size_t block_size, bool reuse_mem = true) + explicit MemoryBlock(size_t block_size, int64_t stream_id = 0, bool reuse_mem = true) : ref_count_(0), - stream_id_(0), + stream_id_(stream_id), deleted_block_(false), reuse_mem_(reuse_mem), input_index_(0), @@ -81,7 +83,7 @@ class MemoryBlock { void Init(size_t real_size, MemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) { real_size_list_.emplace_back(real_size); no_align_size_list_.emplace_back(no_align_size); - node_type_index_list_.emplace_back(node, type, out_index); + node_type_index_list_.emplace_back(node, type, out_index, false); } size_t Size() const { return block_size_; } @@ -129,6 +131,7 @@ class MemoryBlock { bool continuous_block_; bool last_continuous_block_; bool is_zero_copy_; + std::map depend_stream_life_; private: size_t block_size_; @@ -287,7 +290,7 @@ class BlockMemAssigner : public MemAssigner { std::vector zero_memory_list_; // ref mapping - std::map> symbol_to_anchors_; + std::map> symbol_to_anchors_; std::map anchor_to_symbol_; std::map pre_reuse_flag_; std::map post_reuse_flag_; @@ -371,10 +374,10 @@ class BlockMemAssigner : public MemAssigner { /// /// @ingroup GE /// @brief Merge memory blocks between different batchs - /// @return void + /// @return merge or not /// @author /// - void MergeDynamicBatchBlocks(); + bool MergeDynamicBatchBlocks(); void AssignContinuousBlocks(); diff --git a/src/ge/graph/build/memory/module.mk b/src/ge/graph/build/memory/module.mk new file mode 100644 index 00000000..2b77e40e --- /dev/null +++ b/src/ge/graph/build/memory/module.mk @@ -0,0 +1,98 @@ +LOCAL_PATH := $(call my-dir) + + +local_lib_src_files := memory_assigner.cc \ + graph_mem_assigner.cc \ + binary_block_mem_assigner.cc \ + block_mem_assigner.cc \ + hybrid_mem_assigner.cc \ + max_block_mem_assigner.cc \ + var_mem_assign_util.cc \ + +local_lib_inc_path := ${LOCAL_PATH} \ + ${TOPDIR}inc \ + ${TOPDIR}inc/external \ + ${TOPDIR}inc/external/graph \ + $(TOPDIR)libc_sec/include \ + ${TOPDIR}third_party/protobuf/include \ + ${TOPDIR}inc/framework \ + $(TOPDIR)framework/domi \ + +#compiler for host +include $(CLEAR_VARS) +LOCAL_MODULE := libge_memory + +LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -O2 +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libge_common \ + +LOCAL_SRC_FILES := $(local_lib_src_files) + +generated_sources_dir := $(call local-generated-sources-dir) +LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH) +LOCAL_C_INCLUDES := $(local_lib_inc_path) +LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS + +include ${BUILD_HOST_STATIC_LIBRARY} + + +#compiler for device +include $(CLEAR_VARS) +LOCAL_MODULE := libge_memory + +LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY +LOCAL_CFLAGS += -O2 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libge_common \ + +LOCAL_SRC_FILES := $(local_lib_src_files) + +generated_sources_dir := $(call local-generated-sources-dir) +LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH) +LOCAL_C_INCLUDES := $(local_lib_inc_path) +LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS + +include ${BUILD_STATIC_LIBRARY} + +#compiler for device +include $(CLEAR_VARS) +LOCAL_MODULE := libge_memory + +LOCAL_CFLAGS += -std=c++11 +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +LOCAL_SHARED_LIBRARIES := libprotobuf \ + libc_sec \ + libslog \ + libgraph \ + libge_common \ + +LOCAL_SRC_FILES := $(local_lib_src_files) + +generated_sources_dir := $(call local-generated-sources-dir) +LOCAL_EXPORT_C_INCLUDE_DIRS := $(generated_sources_dir)/proto/$(LOCAL_PATH) +LOCAL_C_INCLUDES := $(local_lib_inc_path) +LOCAL_C_INCLUDES += LOCAL_EXPORT_C_INCLUDE_DIRS + +include ${BUILD_LLT_STATIC_LIBRARY} diff --git a/src/ge/graph/build/model_builder.cc b/src/ge/graph/build/model_builder.cc index a3ecc63c..62abd4ab 100644 --- a/src/ge/graph/build/model_builder.cc +++ b/src/ge/graph/build/model_builder.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "common/ge/ge_util.h" #include "framework/common/debug/ge_log.h" #include "graph/anchor.h" @@ -250,7 +251,7 @@ Status ModelBuilder::SetInputOutputDesc() { } // if user set input node format ND, the expected node for data and netoutput format is ND in // final graph. - if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) && + if ((domi::GetContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) && ((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) { GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str()); auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr(); @@ -521,11 +522,37 @@ Status ModelBuilder::MergeWeights() { } if (weight_data.data() != nullptr) { GE_IF_BOOL_EXEC(base_addr == nullptr, GELOGE(FAILED, "Base addr is nullptr."); return FAILED); - GE_CHK_BOOL_EXEC( - memcpy_s(base_addr + offset, weight_offset_ - offset, weight_data.data(), weight_data.size()) == EOK, - return FAILED, "call memcpy_s failed."); + if (weight_offset_ - offset < weight_data.size()) { + GELOGE(FAILED, "left weight size not enough. left_size:%lu, weight_size:%lu", weight_offset_ - offset, + weight_data.size()); + return FAILED; + } + uintptr_t dst_ptr = (uintptr_t)base_addr + offset; + uintptr_t src_ptr = (uintptr_t)weight_data.data(); + size_t left_size = weight_data.size(); + while (left_size > SECUREC_MEM_MAX_LEN) { + auto err = memcpy_s(reinterpret_cast(dst_ptr), SECUREC_MEM_MAX_LEN, reinterpret_cast(src_ptr), + SECUREC_MEM_MAX_LEN); + if (err != EOK) { + GELOGE(FAILED, + "mem copy failed. errret:%u, " + "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", + err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); + return FAILED; + } + left_size -= SECUREC_MEM_MAX_LEN; + dst_ptr = dst_ptr + SECUREC_MEM_MAX_LEN; + src_ptr = src_ptr + SECUREC_MEM_MAX_LEN; + } + auto err = memcpy_s(reinterpret_cast(dst_ptr), left_size, reinterpret_cast(src_ptr), left_size); + if (err != EOK) { + GELOGE(FAILED, + "mem copy failed. errret:%u, " + "dst_ptr:%lx, dst_size:%lu, src_ptr:%lx, src_size:%lu", + err, dst_ptr, SECUREC_MEM_MAX_LEN, src_ptr, SECUREC_MEM_MAX_LEN); + return FAILED; + } } - weight_data.clear(); } diff --git a/src/ge/graph/build/stream_allocator.cc b/src/ge/graph/build/stream_allocator.cc index 318134bd..f6323434 100644 --- a/src/ge/graph/build/stream_allocator.cc +++ b/src/ge/graph/build/stream_allocator.cc @@ -683,7 +683,7 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { GELOGE(FAILED, "SplitStreams:streamid(%ld) > last_stream_id(%ld)", stream_id, last_stream_id); return FAILED; } - stream_node_num_vec[stream_id]++; + AddNodeNum(cur_node, stream_node_num_vec[stream_id]); stream_2_nodes_map[stream_id].push_back(cur_node); // The maximum number of tasks per stream. int64_t max_node_num_one_stream = GetMaxNodeNumPerStream(cur_node, max_task_count); @@ -706,7 +706,8 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { "It's time to split the stream, split newly-added stream id is %ld", stream_id, stream_node_num_vec[stream_id], max_node_num_one_stream, last_stream_id); NodePtr pre_node = pre_node_vec[stream_id]; - stream_node_num_vec[stream_id] = 1; + stream_node_num_vec[stream_id] = 0; + AddNodeNum(cur_node, stream_node_num_vec[stream_id]); // try spilt a new stream and move same continuous stream label nodes from this stream bool not_use_cur = false; NodePtr not_cur = nullptr; @@ -720,7 +721,7 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { auto stored_op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(stored_op_desc); stored_op_desc->SetStreamId(last_stream_id); - stream_node_num_vec[stream_id]++; + AddNodeNum(node, stream_node_num_vec[stream_id]); } not_use_cur = true; not_cur = nodes.front(); @@ -1055,7 +1056,7 @@ Status StreamAllocator::CollectDeactiveStream(const OpDescPtr &op_desc, std::set // Insert StreamActive Op for Entry Stream. Status StreamAllocator::InsertActiveEntryStream(const std::vector &active_streams, int64_t stream_id) { - string node_name = "ActiveEntryStream_" + string(STREAMACTIVE); + string node_name = whole_graph_->GetName() + "_ActiveEntryStream_" + string(STREAMACTIVE); OpDescPtr op_desc = ge::MakeShared(node_name, STREAMACTIVE); if (op_desc == nullptr) { GELOGE(FAILED, "Failed to new opdesc."); @@ -1143,7 +1144,7 @@ Status StreamAllocator::InsertSyncEventNodes() { GE_CHECK_NOTNULL(node->GetInControlAnchor()); GE_CHECK_NOTNULL(node->GetOutControlAnchor()); for (auto &event_id : recv_event_id_list) { - string recv_node_name = "_Recv_" + to_string(event_id); + string recv_node_name = whole_graph_->GetName() + "_Recv_" + to_string(event_id); OpDescPtr op_desc_ptr = MakeShared(recv_node_name, RECV); GE_CHECK_NOTNULL(op_desc_ptr); @@ -1171,7 +1172,7 @@ Status StreamAllocator::InsertSyncEventNodes() { GetSendEventIdList(node, send_event_id_list); for (auto &event_id : send_event_id_list) { - string send_node_name = "_Send_" + to_string(event_id); + string send_node_name = whole_graph_->GetName() + "_Send_" + to_string(event_id); OpDescPtr op_desc_ptr = MakeShared(send_node_name, SEND); GE_CHECK_NOTNULL(op_desc_ptr); @@ -1291,6 +1292,15 @@ int64_t StreamAllocator::GetMaxNodeNumPerStream(const NodePtr &node, uint32_t ma return max_node_num_one_stream; } +void StreamAllocator::AddNodeNum(const NodePtr &node, int64_t &node_num) { + node_num++; + vector events; + GetSendEventIdList(node, events); + node_num += static_cast(events.size()); + GetRecvEventIdList(node, events); + node_num += static_cast(events.size()); +} + // Insert send event id on a node void StreamAllocator::AddSendEventId(const NodePtr &node, uint32_t event_id) { node_to_send_events_[node].emplace_back(event_id); diff --git a/src/ge/graph/build/stream_allocator.h b/src/ge/graph/build/stream_allocator.h index 528c22a9..ae79430a 100644 --- a/src/ge/graph/build/stream_allocator.h +++ b/src/ge/graph/build/stream_allocator.h @@ -80,6 +80,7 @@ class StreamAllocator { Status GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stream_count, uint32_t &max_task_count); int64_t GetMaxNodeNumPerStream(const NodePtr &node, uint32_t max_node_num_one_stream); + void AddNodeNum(const NodePtr &node, int64_t &node_num); void AddSendEventId(const NodePtr &node, uint32_t event_id); void AddRecvEventId(const NodePtr &node, uint32_t event_id); diff --git a/src/ge/graph/build/task_generator.cc b/src/ge/graph/build/task_generator.cc index ec6bf584..2ce4e89d 100644 --- a/src/ge/graph/build/task_generator.cc +++ b/src/ge/graph/build/task_generator.cc @@ -47,6 +47,7 @@ const char *const kIsOutputVar = "OUTPUT_IS_VAR"; const char *const kProfilingMode = "PROFILING_MODE"; const char *const kProfilingFpPoint = "FP_POINT"; const char *const kProfilingBpPoint = "BP_POINT"; +const char *const kOffOptimize = "off_optimize"; const uint32_t kProfilingArStep = 2; const uint64_t kProfilingFpStartLogid = 1; const uint64_t kProfilingBpEndLogid = 2; @@ -83,10 +84,10 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t } Status ret = SUCCESS; if (is_unknown_shape) { - GELOGI("Beign to generate unknown shape task."); + GELOGI("Beign to generate unknown shape task. Graph name is %s.", graph->GetName().c_str()); ret = GenerateUnknownShapeTask(run_context, graph, task_def_list, op_name_map); } else { - GELOGI("Beign to generate known shape task."); + GELOGI("Beign to generate known shape task. Graph name is %s.", graph->GetName().c_str()); ret = GenerateTask(run_context, graph, task_def_list, op_name_map); } GE_DUMP(graph, "GenerateTaskAfter"); @@ -108,7 +109,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t GELOGE(FAILED, "SetListStr failed."); return FAILED); - GELOGI("Call GenerateTask Success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), + GELOGI("Generate task success, task_def_list.size:%zu, op_name_map.size:%zu", task_def_list.size(), op_name_map.size()); // Init and serialize model_task_def @@ -130,7 +131,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t return ret; } - GELOGI("Get TaskInfo success. session_id=%lu", session_id); + GELOGI("Get TaskInfo success. session id is %lu", session_id); return SUCCESS; } @@ -253,7 +254,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); return GE_CLI_GE_NOT_INITIALIZED; } - GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed."); + GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); ProfilingPoint profiling_point; vector all_reduce_nodes; GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); @@ -263,9 +264,9 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra GE_TIMESTAMP_CALLNUM_START(GenerateTask); // map store fusion nodes map> fusion_nodes; - string buffer_optimize = "off_optimize"; + string buffer_optimize = kOffOptimize; (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); - if (buffer_optimize != "off_optimize") { + if (buffer_optimize != kOffOptimize) { GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); } std::unordered_set fusion_nodes_seen; @@ -371,7 +372,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GenerateTask failed."); return GE_CLI_GE_NOT_INITIALIZED; } - GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "MarkNodeAndSetIndex failed."); + GE_CHK_STATUS_RET(MarkNodeAndSetIndex(graph), "Mark node and set index failed."); ProfilingPoint profiling_point; vector all_reduce_nodes; GE_CHK_STATUS_RET(FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes)); @@ -381,9 +382,9 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG GE_TIMESTAMP_CALLNUM_START(GenerateTask); // map store fusion nodes map> fusion_nodes; - string buffer_optimize = "off_optimize"; + string buffer_optimize = kOffOptimize; (void)ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); - if (buffer_optimize != "off_optimize") { + if (buffer_optimize != kOffOptimize) { GE_CHK_STATUS_RET(SaveFusionNodes(fusion_nodes, graph)); } std::unordered_set fusion_nodes_seen; @@ -392,7 +393,11 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG rtStream_t stream = nullptr; GE_CHK_RT_RET(rtStreamCreate(&stream, 0)); run_context.stream = stream; - GE_CHK_RT_RET(rtModelBindStream(run_context.model, stream, 0)); + if (rtModelBindStream(run_context.model, stream, 0) != RT_ERROR_NONE) { + GELOGE(FAILED, "Call rt api failed."); + GE_CHK_RT(rtStreamDestroy(stream)); + return FAILED; + } for (auto &node : graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -437,7 +442,7 @@ Status TaskGenerator::GenerateUnknownShapeTask(RunContext &run_context, ComputeG size_t task_list_size_before = task_def_list.size(); GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); - GELOGI("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), + GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id); GE_TIMESTAMP_RESTART(GenerateTask); auto ret = kernel_info_store->GenerateTask(*node, run_context, task_def_list); @@ -659,14 +664,15 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { Status TaskGenerator::MarkFirstAndLastOps(const vector &ops, bool is_single_stream) const { vector> continuous_op_lists(1); - const set label_op_types({LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX}); + const set separator_types( + {LABELSET, LABELGOTO, LABELGOTOEX, LABELSWITCH, LABELSWITCHBYINDEX, STREAMSWITCH, STREAMSWITCHN}); for (auto &op_desc : ops) { bool attr_notask = false; if (ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOTASK, attr_notask) && attr_notask) { continue; } string op_type = op_desc->GetType(); - if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || label_op_types.count(op_type) != 0)) { + if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) { continuous_op_lists.emplace_back(vector()); } else { continuous_op_lists.back().emplace_back(op_desc); @@ -727,7 +733,6 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP fp_op_desc = in_node_desc; } } - GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); break; } } @@ -736,6 +741,7 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP GELOGW("not find fp_op_desc."); return SUCCESS; } + GELOGI("Find fp_op_desc is %s, id is %ld", fp_op_desc->GetName().c_str(), fp_op_desc->GetId()); for (auto &node : graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc index 4173706a..9293b9af 100644 --- a/src/ge/graph/execute/graph_execute.cc +++ b/src/ge/graph/execute/graph_execute.cc @@ -86,6 +86,17 @@ Status GraphExecutor::SetGraphContext(GraphContextPtr graph_context_ptr) { return SUCCESS; } +Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector &batch_num) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->SetDynamicSize(model_id, batch_num); + if (ret != SUCCESS) { + GELOGE(FAILED, "SetDynamicSize failed"); + return ret; + } + return SUCCESS; +} + void GraphExecutor::SetTrainFlag(bool is_train_graph) { train_graph_flag_ = is_train_graph; } Status GraphExecutor::FreeInOutBuffer() { @@ -476,7 +487,28 @@ Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector &batch_info) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetCurShape(model_id, batch_info); + if (ret != SUCCESS) { + GELOGE(FAILED, "GetCurShape failed"); + return ret; + } + return SUCCESS; +} +Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetModelAttr(model_id, dynamic_output_shape_info); + if (ret != SUCCESS) { + GELOGE(FAILED, "GetModelAttr failed"); + return ret; + } return SUCCESS; } @@ -503,4 +535,43 @@ Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vecto return SUCCESS; } + +Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetAIPPInfo failed."); + return ret; + } + + return SUCCESS; +} + +Status GraphExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetOrigInputInfo(model_id, index, orig_input_info); + if (ret != SUCCESS) { + GELOGE(ret, "GetOrigInputInfo failed."); + return ret; + } + + return SUCCESS; +} + +Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, + std::vector &input_dims, + std::vector &output_dims) { + auto model_manager = ge::ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + Status ret = model_manager->GetAllAippInputOutputDims(model_id, index, input_dims, output_dims); + if (ret != SUCCESS) { + GELOGE(ret, "GetAllAippInputOutputDims failed."); + return ret; + } + + return SUCCESS; +} + } // namespace ge diff --git a/src/ge/graph/execute/graph_execute.h b/src/ge/graph/execute/graph_execute.h index 9d4ecc24..ae467515 100644 --- a/src/ge/graph/execute/graph_execute.h +++ b/src/ge/graph/execute/graph_execute.h @@ -56,6 +56,8 @@ class GraphExecutor { Status SetGraphContext(GraphContextPtr graph_context_ptr); + static Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num); + void SetTrainFlag(bool is_train_graph); const std::vector &GetOutputsDesc() const { return outputs_desc_; } @@ -71,6 +73,8 @@ class GraphExecutor { vector &output_desc, std::vector &input_formats, std::vector &output_formats); + static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + /// /// @ingroup ge /// @brief Get dynamic batch_info @@ -80,10 +84,17 @@ class GraphExecutor { /// static Status GetDynamicBatchInfo(uint32_t model_id, std::vector> &batch_info); + static Status GetCurShape(const uint32_t model_id, std::vector &batch_info); + + static Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector &input_desc, vector &output_desc, std::vector &input_formats, std::vector &output_formats); + static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); + static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, + std::vector &output_dims); private: Status PrepareInputData(const std::vector &input_tensor, InputData &graph_input_data, diff --git a/src/ge/graph/label/while_label_maker.cc b/src/ge/graph/label/while_label_maker.cc index 55b5dfb2..6601abd1 100644 --- a/src/ge/graph/label/while_label_maker.cc +++ b/src/ge/graph/label/while_label_maker.cc @@ -98,7 +98,7 @@ Status WhileOpLabelMaker::Run(uint32_t &label_index) { return FAILED; } - NodePtr cond_out_node = cond_graph->FindNode(NODE_NAME_NET_OUTPUT); + NodePtr cond_out_node = cond_graph->FindFirstNodeMatchType(NETOUTPUT); GE_CHECK_NOTNULL(cond_out_node); OpDescPtr cond_out_desc = cond_out_node->GetOpDesc(); GE_CHECK_NOTNULL(cond_out_desc); diff --git a/src/ge/graph/load/new_model_manager/aipp_utils.cc b/src/ge/graph/load/new_model_manager/aipp_utils.cc new file mode 100644 index 00000000..e7ae922c --- /dev/null +++ b/src/ge/graph/load/new_model_manager/aipp_utils.cc @@ -0,0 +1,90 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/load/new_model_manager/aipp_utils.h" + +#include + +#include "common/debug/log.h" +#include "common/op/ge_op_utils.h" +#include "framework/common/util.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/attr_utils.h" + +#include "framework/common/debug/ge_log.h" + +namespace ge { +#define AIPP_CONVERT_TO_AIPP_INFO(KEY) aipp_info.KEY = aipp_params->KEY() + +#define AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(KEY, INDEX) \ + do { \ + if (aipp_params->KEY##_size() > 0) { \ + aipp_info.KEY = aipp_params->KEY(INDEX); \ + } \ + } while (0) + +Status AippUtils::ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info) { + GE_CHECK_NOTNULL(aipp_params); + AIPP_CONVERT_TO_AIPP_INFO(input_format); + AIPP_CONVERT_TO_AIPP_INFO(src_image_size_w); + AIPP_CONVERT_TO_AIPP_INFO(src_image_size_h); + AIPP_CONVERT_TO_AIPP_INFO(crop); + AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_w); + AIPP_CONVERT_TO_AIPP_INFO(load_start_pos_h); + AIPP_CONVERT_TO_AIPP_INFO(crop_size_w); + AIPP_CONVERT_TO_AIPP_INFO(crop_size_h); + AIPP_CONVERT_TO_AIPP_INFO(resize); + AIPP_CONVERT_TO_AIPP_INFO(resize_output_w); + AIPP_CONVERT_TO_AIPP_INFO(resize_output_h); + AIPP_CONVERT_TO_AIPP_INFO(padding); + AIPP_CONVERT_TO_AIPP_INFO(left_padding_size); + AIPP_CONVERT_TO_AIPP_INFO(right_padding_size); + AIPP_CONVERT_TO_AIPP_INFO(top_padding_size); + AIPP_CONVERT_TO_AIPP_INFO(bottom_padding_size); + AIPP_CONVERT_TO_AIPP_INFO(csc_switch); + AIPP_CONVERT_TO_AIPP_INFO(rbuv_swap_switch); + AIPP_CONVERT_TO_AIPP_INFO(ax_swap_switch); + AIPP_CONVERT_TO_AIPP_INFO(single_line_mode); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c0, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c1, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r0c2, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c0, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c1, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r1c2, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c0, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c1, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(matrix_r2c2, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_0, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_1, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(output_bias_2, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_0, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_1, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(input_bias_2, 0); + AIPP_CONVERT_TO_AIPP_INFO(mean_chn_0); + AIPP_CONVERT_TO_AIPP_INFO(mean_chn_1); + AIPP_CONVERT_TO_AIPP_INFO(mean_chn_2); + AIPP_CONVERT_TO_AIPP_INFO(mean_chn_3); + AIPP_CONVERT_TO_AIPP_INFO(min_chn_0); + AIPP_CONVERT_TO_AIPP_INFO(min_chn_1); + AIPP_CONVERT_TO_AIPP_INFO(min_chn_2); + AIPP_CONVERT_TO_AIPP_INFO(min_chn_3); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_0, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_1, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_2, 0); + AIPP_CONVERT_TO_AIPP_INFO_WITH_INDEX(var_reci_chn_3, 0); + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/graph/load/new_model_manager/aipp_utils.h b/src/ge/graph/load/new_model_manager/aipp_utils.h new file mode 100644 index 00000000..2534b9fb --- /dev/null +++ b/src/ge/graph/load/new_model_manager/aipp_utils.h @@ -0,0 +1,48 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_ +#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_ + +#include + +#include "common/ge_inner_error_codes.h" +#include "common/ge_types.h" +#include "graph/op_desc.h" +#include "proto/insert_op.pb.h" + +using std::vector; + +namespace ge { +const uint32_t kAippOriginInputIndex = 0; +const uint32_t kAippInfoNum = 6; +const uint32_t kAippInfoFormat = 0; +const uint32_t kAippInfoDataType = 1; +const uint32_t kAippInfoTensorName = 2; +const uint32_t kAippInfoTensorSize = 3; +const uint32_t kAippInfoDimNum = 4; +const uint32_t kAippInfoShape = 5; + +class AippUtils { + public: + AippUtils() = default; + ~AippUtils() = default; + + static Status ConvertAippParams2AippInfo(domi::AippOpParams *aipp_params, AippConfigInfo &aipp_info); +}; +} // namespace ge + +#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_AIPP_UTILS_H_ diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc index db9318ec..47f6ffcf 100644 --- a/src/ge/graph/load/new_model_manager/data_dumper.cc +++ b/src/ge/graph/load/new_model_manager/data_dumper.cc @@ -35,7 +35,6 @@ namespace { const uint32_t kAicpuLoadFlag = 1; const uint32_t kAicpuUnloadFlag = 0; -const uint32_t kTimeBufferLen = 80; const char *const kDumpOutput = "output"; const char *const kDumpInput = "input"; const char *const kDumpAll = "all"; @@ -190,18 +189,6 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin } } -static std::string GetCurrentTime() { - std::time_t now = std::time(nullptr); - std::tm *ptm = std::localtime(&now); - if (ptm == nullptr) { - return ""; - } - char buffer[kTimeBufferLen] = {0}; - // format: 20171122042550 - std::strftime(buffer, kTimeBufferLen, "%Y%m%d%H%M%S", ptm); - return std::string(buffer); -} - Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) { GELOGI("Start dump output"); if (inner_dump_info.is_task) { @@ -384,10 +371,9 @@ Status DataDumper::LoadDumpInfo() { } aicpu::dump::OpMappingInfo op_mapping_info; - std::string time_now = GetCurrentTime(); - GELOGI("Time is %s now", time_now.c_str()); - op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + time_now + "/" + - std::to_string(device_id_) + "/"); + + auto dump_path = PropertiesManager::Instance().GetDumpOutputPath(); + op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/"); op_mapping_info.set_model_name(model_name_); op_mapping_info.set_model_id(model_id_); op_mapping_info.set_flag(kAicpuLoadFlag); diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index a0e88f3c..46dd8201 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -80,6 +80,7 @@ const uint32_t kOutputNum = 1; const uint32_t kTrueBranchStreamNum = 1; const uint32_t kThreadNum = 16; const uint32_t kAddrLen = sizeof(void *); +const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; const int kDecimal = 10; const int kBytes = 8; const uint32_t kDataMemAlignSizeCompare = 64; @@ -579,6 +580,14 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size auto ret = DoTaskSink(); GE_TIMESTAMP_END(DoTaskSink, "GraphLoader::DoTaskSink"); + /// In zero copy model, if a aicpu operator is connected to the first or last layer, before model execution, + /// the aicpu opertor needs to destroy history record, and update operator memory address. + /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). + if (MarkSpecifiedAicpuKernel() != SUCCESS) { + GELOGE(FAILED, "Mark model with specified aicpu operators failed."); + return FAILED; + } + // collect profiling for ge if (ProfilingManager::Instance().ProfilingOn()) { std::vector compute_graph_desc_info; @@ -593,6 +602,82 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size return ret; } +/// +/// @ingroup ge +/// @brief Travel all nodes and determine if destruction is required. +/// @return bool +/// +bool DavinciModel::IsAicpuKernelConnectSpecifiedLayer() { + Graph graph = ge_model_->GetGraph(); + ComputeGraphPtr compute_graph = GraphUtils::GetComputeGraph(graph); + auto all_nodes = compute_graph->GetAllNodes(); + for (auto &node : all_nodes) { + GE_IF_BOOL_EXEC(node == nullptr, continue); + OpDescPtr op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(op_desc == nullptr, continue); + + int64_t imply_type = -1; + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, imply_type); + if (imply_type != static_cast(domi::ImplyType::AI_CPU)) { + continue; + } + GELOGD("Current operator imply type is %ld, name is %s.", imply_type, op_desc->GetName().c_str()); + + for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); + auto peer_node = peer_out_data_anchor->GetOwnerNode(); + GE_IF_BOOL_EXEC(peer_node == nullptr, continue); + auto peer_op_desc = peer_node->GetOpDesc(); + GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); + if (IsDataOp(peer_op_desc->GetType())) { + GELOGI("Mark specified aicpu operator connected to data."); + return true; + } + } + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); + auto peer_in_data_anchors = out_data_anchor->GetPeerInDataAnchors(); + for (auto &peer_in_data_anchor : peer_in_data_anchors) { + GE_IF_BOOL_EXEC(peer_in_data_anchor == nullptr, continue); + auto peer_node = peer_in_data_anchor->GetOwnerNode(); + GE_IF_BOOL_EXEC(peer_node == nullptr, continue); + auto peer_op_desc = peer_node->GetOpDesc(); + GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); + if (peer_op_desc->GetType() == NETOUTPUT) { + GELOGI("Mark specified aicpu operator connected to netoutput."); + return true; + } + } + } + } + + return false; +} +/// +/// @ingroup ge +/// @brief mark ge model with specified aicpu operators . +/// @return Status +/// +Status DavinciModel::MarkSpecifiedAicpuKernel() { + bool result = IsAicpuKernelConnectSpecifiedLayer(); + if (!result) { + // No aicpu operator needing destroy. + GELOGD("No specified aicpu operator that connects to data or netoutput."); + return SUCCESS; + } + + bool ret = ge::AttrUtils::SetBool(ge_model_, kNeedDestroySpecifiedAicpuKernel, result); + if (!ret) { + GELOGW("Add attr[%s] in ge model failed, and may lead to specified aicpu operators destruction failure.", + kNeedDestroySpecifiedAicpuKernel); + } + GELOGI("Mark ge model success, the model has specified aicpu operators, ge model name: %s.", + ge_model_->GetName().c_str()); + return SUCCESS; +} + /// /// @ingroup ge /// @brief Travel all nodes and do some init. @@ -1002,8 +1087,6 @@ Status DavinciModel::BindInputQueue() { /// @ingroup ge /// @brief definiteness queue schedule, bind input queue to task. /// @param [in] queue_id: input queue id from user. -/// @param [in] addr: Data Op output tensor address. -/// @param [in] size: Data Op output tensor size. /// @return: 0 for success / others for failed Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { GELOGI("Set CpuKernel model dequeue task enter."); @@ -1266,10 +1349,76 @@ Status DavinciModel::GetDynamicBatchInfo(std::vector> &batc } break; } + return SUCCESS; +} + +/// +/// @ingroup ge +/// @brief Get AIPP input info +/// @param [in] index +/// @param [out] aipp_info +/// @return execute result +/// +Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { + GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); + OpDescPtr data_op = data_op_list_[index]; + if (!data_op->HasAttr(ATTR_NAME_AIPP)) { + GELOGE(GE_AIPP_NOT_EXIST, "GetAIPPInfo: there is not AIPP related with index %u.", index); + return GE_AIPP_NOT_EXIST; + } + + std::unique_ptr aipp_params(new (std::nothrow) domi::AippOpParams()); + GE_CHECK_NOTNULL(aipp_params); + + ge::GeAttrValue::NAMED_ATTRS aipp_attr; + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + "Data node do not contain param aipp!"); + GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); + GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", + data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); + if (aipp_params->aipp_mode() == domi::AippOpParams::dynamic) { + GELOGI("GetAIPPInfo, dynamic Aipp is not support to query temporarily."); + return GE_DYNAMIC_AIPP_NOT_SUPPORT_QUERY; + } + + GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), + "convert aipp params to aipp config info failed"); return SUCCESS; } +void DavinciModel::SetDynamicSize(const std::vector &batch_num) { + batch_size_.clear(); + if (batch_num.empty()) { + GELOGD("User has not set dynammic data"); + } + for (size_t i = 0; i < batch_num.size(); i++) { + batch_size_.emplace_back(batch_num[i]); + } +} + +void DavinciModel::GetCurShape(std::vector &batch_info) { + if (batch_size_.empty()) { + GELOGD("User does not set dynamic size"); + } + for (size_t i = 0; i < batch_size_.size(); i++) { + GELOGI("Start to get current shape"); + batch_info.emplace_back(batch_size_[i]); + } +} + +void DavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { + for (auto &op : output_op_list_) { + if (op->GetType() != NETOUTPUT) { + continue; + } + GELOGI("Start to get dynamic output dims attr"); + if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { + GELOGD("Can not get dynamic output dims attr"); + } + } +} + Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector &input_desc, vector &output_desc, std::vector &input_formats, @@ -1299,7 +1448,7 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector &input_desc, std::vector &formats) { - for (std::size_t index = 0; index < data_op_list_.size(); ++index) { + for (size_t index = 0; index < data_op_list_.size(); ++index) { InputOutputDescInfo input; GE_CHECK_NOTNULL(data_op_list_[index]); GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0)); @@ -1495,7 +1644,14 @@ Status DavinciModel::SinkModelProfile() { // Model Header string name = this->Name(); int32_t name_len = name.size(); - reporter_data.deviceId = device_id_; + // phy device id + uint32_t phy_device_id = 0; + rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); + return FAILED; + } + reporter_data.deviceId = phy_device_id; reporter_data.data = (unsigned char *)&name_len; reporter_data.dataLen = sizeof(int32_t); GE_CHK_BOOL_EXEC(reporter->Report(&reporter_data) == SUCCESS, return FAILED, "Reporter data fail, model id:%u.", @@ -1671,7 +1827,13 @@ Status DavinciModel::SinkTimeProfile(const InputData ¤t_data) { GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, return FAILED, "Sink model tag memcpy error."); // device id - reporter_data.deviceId = device_id_; + uint32_t phy_device_id = 0; + rtError_t rt_ret = rtGetDevicePhyIdByIndex(device_id_, &phy_device_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "runtime get phy_device_id failed, current phy_device_id:%d", phy_device_id); + return FAILED; + } + reporter_data.deviceId = phy_device_id; // Model Header string name = this->Name(); @@ -2637,8 +2799,10 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 } if (input_size > op_size) { - GELOGE(FAILED, "Input size [%u] can not be bigger than op size [%u]", input_size, op_size); - return false; + GELOGW( + "Input size [%u] is bigger than om size need [%u]," + "MAY cause inference result ERROR, please check model input", + input_size, op_size); } bool is_dynamic_aipp = false; for (const auto &op_desc : data_op_list_) { @@ -2707,14 +2871,18 @@ Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &outp /// Status DavinciModel::UpdateIoTaskArgs(const map> &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label) { + string input_or_output = "input"; + is_input ? input_or_output = "input" : input_or_output = "output"; if (blobs.size() != data_info.size()) { - GELOGE(FAILED, "Blobs not match: blobs=%zu datas=%zu", blobs.size(), data_info.size()); + GELOGE(FAILED, "Verify %s data num failed: model requires %zu, but user actually feeds %zu", + input_or_output.c_str(), data_info.size(), blobs.size()); return FAILED; } for (const auto &data : data_info) { if (data.first >= blobs.size()) { // check data index. - GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u", blobs.size(), data_info.size(), data.first); + GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu", + input_or_output.c_str(), data.first, blobs.size()); return FAILED; } int64_t size = data.second.first; // size of tensor. @@ -3262,7 +3430,7 @@ void DavinciModel::PushHcclStream(rtStream_t value) { void DavinciModel::CreateHcclFollowStream(rtStream_t stream, int64_t remain_cap) { std::lock_guard lock(capacity_of_stream_mutex_); capacity_of_stream_.emplace_back(make_pair(stream, remain_cap)); -}; +} void DavinciModel::ReuseHcclFollowStream(int64_t remain_cap, int64_t &index) { std::lock_guard lock(capacity_of_stream_mutex_); @@ -3320,4 +3488,91 @@ Status DavinciModel::GetComputeGraphInfo(std::vector &comp return SUCCESS; } +Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) { + GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); + OpDescPtr data_op = data_op_list_[index]; + if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { + GELOGE(GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index); + return GE_AIPP_NOT_EXIST; + } + + vector inputs; + if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { + std::string input = inputs[kAippOriginInputIndex]; + GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str()); + std::vector infos = ge::StringUtils::Split(input, ':'); + if (infos.size() != kAippInfoNum) { + GELOGW("origin input str is invalid."); + } + orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); + orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); + orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); + } + + return SUCCESS; +} + +void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info) { + GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str()); + std::vector infos = ge::StringUtils::Split(in_out_info, ':'); + if (infos.size() != kAippInfoNum) { + GELOGW("origin input str is invalid."); + } + dims_info.name = infos[kAippInfoTensorName]; + dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal); + dims_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); + + std::vector dims = ge::StringUtils::Split(infos[kAippInfoShape], ','); + for (const auto &dim : dims) { + if (dim.empty()) { + continue; + } + dims_info.dims.emplace_back(std::strtol(dim.c_str(), nullptr, kDecimal)); + } +} + +Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, + std::vector &output_dims) { + GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); + OpDescPtr data_op = data_op_list_[index]; + if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { + GELOGE(GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index); + return GE_AIPP_NOT_EXIST; + } + + vector inputs; + if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { + GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size()); + for (auto it : inputs) { + InputOutputDims input_info; + ParseAIPPInfo(it, input_info); + input_dims.emplace_back(input_info); + GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str()); + + ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex); + int64_t data_input_size; + (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); + GELOGD( + "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: %s, " + "data_type: %s, shape: %s .", + index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, + TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), + TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), + formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); + } + } + + vector outputs; + if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { + for (auto it : outputs) { + InputOutputDims output_info; + ParseAIPPInfo(it, output_info); + output_dims.emplace_back(output_info); + GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str()); + } + } + + return SUCCESS; +} + } // namespace ge diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index cd532923..067fa112 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -34,6 +34,7 @@ #include "graph/load/new_model_manager/data_dumper.h" #include "graph/load/new_model_manager/data_inputer.h" #include "graph/load/new_model_manager/model_utils.h" +#include "graph/load/new_model_manager/aipp_utils.h" #include "graph/load/new_model_manager/zero_copy_task.h" #include "graph/model.h" #include "graph/node.h" @@ -294,6 +295,19 @@ class DavinciModel { /// Status GetDynamicBatchInfo(std::vector> &batch_info); + void GetCurShape(std::vector &batch_info); + + void GetModelAttr(std::vector &dynamic_output_shape_info); + + /// + /// @ingroup ge + /// @brief Get AIPP input info + /// @param [in] index + /// @param [out] aipp_info + /// @return execute result + /// + Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); + /// /// @ingroup ge /// @brief Get model_id. @@ -407,6 +421,8 @@ class DavinciModel { void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector &outside_addrs, const void *info, void *args, size_t size, size_t offset); + void SetDynamicSize(const std::vector &batch_num); + bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } void SetProfileTime(ModelProcStage stage, int64_t endTime = 0); @@ -452,6 +468,10 @@ class DavinciModel { Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); Status UpdateKnownZeroCopyAddr(vector &io_addrs, uint32_t args_offset); + Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); + Status GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, + std::vector &output_dims); + private: // memory address of weights uint8_t *weights_mem_base_; @@ -560,6 +580,10 @@ class DavinciModel { void UnbindTaskSinkStream(); + bool IsAicpuKernelConnectSpecifiedLayer(); + + Status MarkSpecifiedAicpuKernel(); + /// /// @ingroup ge /// @brief Travel all nodes and do some init. @@ -741,6 +765,8 @@ class DavinciModel { Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, std::vector &outputs); + void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); + bool is_model_has_inited_; uint32_t model_id_; uint32_t runtime_model_id_; @@ -856,6 +882,8 @@ class DavinciModel { void *args_host_ = nullptr; std::map knonw_input_data_info_; std::map knonw_output_data_info_; + + vector batch_size_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc index 8171498a..8b17a35b 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.cc +++ b/src/ge/graph/load/new_model_manager/model_manager.cc @@ -22,6 +22,8 @@ #include "common/profiling/profiling_manager.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" +#include "graph/debug/ge_attr_define.h" +#include "framework/common/util.h" #include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" @@ -31,6 +33,7 @@ thread_local uint32_t device_count = 0; namespace { const int kCmdParSize = 2; const int kDumpCmdPairSize = 2; +const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel"; } // namespace std::shared_ptr ModelManager::GetInstance() { @@ -39,7 +42,10 @@ std::shared_ptr ModelManager::GetInstance() { return instance_ptr; } -ModelManager::ModelManager() { max_model_id_ = 0; } +ModelManager::ModelManager() { + max_model_id_ = 0; + session_id_bias_ = 0; +} Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) { STR_FWK_OP_KERNEL param_base = {}; @@ -69,6 +75,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;) uint64_t kernel_id_addr = static_cast(reinterpret_cast(aicpu_kernel_addr)); param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; + // In the scene of loading once and running many times, the kernel needs to be destroyed many times, + // and connot be removed from kernel map. } } @@ -213,6 +221,13 @@ Status ModelManager::SetDevice(int32_t deviceId) const { return SUCCESS; } +ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector &batch_num) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHECK_NOTNULL(davinci_model); + davinci_model->SetDynamicSize(batch_num); + return SUCCESS; +} + ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const shared_ptr &ge_root_model, const shared_ptr &listener) { auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); @@ -616,7 +631,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) { return FAILED; } if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { - dump_path += "/"; + dump_path = dump_path + "/" + CurrentTimeInStr() + "/"; } GELOGI("dump status = %s.", dump_path.c_str()); @@ -647,7 +662,6 @@ Status ModelManager::HandleDumpCommand(const Command &command) { Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { auto hybrid_model = GetHybridModel(model_id); if (hybrid_model != nullptr) { - // TODO hybrid use dynamic memory allocation max_size = 0; return SUCCESS; } @@ -694,6 +708,20 @@ Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vectorGetDynamicBatchInfo(batch_info); } +Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHECK_NOTNULL(davinci_model); + davinci_model->GetCurShape(batch_info); + return SUCCESS; +} + +Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHECK_NOTNULL(davinci_model); + davinci_model->GetModelAttr(dynamic_output_shape_info); + return SUCCESS; +} + Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector &input_desc, vector &output_desc, std::vector &inputFormats, @@ -705,6 +733,52 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } +/// +/// @ingroup ge +/// @brief Get AIPP info +/// @param [in] model_id +/// @param [in] index +/// @param [out] aipp_info +/// @return execute result +/// +Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", + model_id); + + return davinci_model->GetAIPPInfo(index, aipp_info); +} + +Status ModelManager::GenSessionId(uint64_t &session_id) { + std::lock_guard lock(session_id_create_mutex_); + + struct timeval tv; + if (gettimeofday(&tv, nullptr) != 0) { + GELOGE(INTERNAL_ERROR, "Failed to get current time."); + return INTERNAL_ERROR; + } + session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us + + session_id_bias_++; + // max bais 100. + session_id_bias_ = session_id_bias_ % 100; + session_id = session_id * 100 + session_id_bias_; + + GELOGD("Generate new session id: %lu.", session_id); + return SUCCESS; +} + +Status ModelManager::UpdateSessionId(std::shared_ptr &davinci_model, uint64_t session_id) { + GeModelPtr ge_model_current = davinci_model->GetGeModel(); + GE_CHECK_NOTNULL(ge_model_current); + if (!ge::AttrUtils::SetInt(ge_model_current, ge::MODEL_ATTR_SESSION_ID, static_cast(session_id))) { + GELOGW("Set attr[%s] failed in updating session_id.", MODEL_ATTR_SESSION_ID.c_str()); + } + + GELOGD("Update session id: %lu.", session_id); + return SUCCESS; +} + Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, PARAM_INVALID, @@ -747,6 +821,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model } davinci_model->SetDeviceId(device_id); + /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. + /// These session_ids come from the same model, so the values of session_id are the same. + /// Update session_id for infer in load model to avoid the same session_id. + uint64_t new_session_id; + ret = GenSessionId(new_session_id); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed."); + ret = UpdateSessionId(davinci_model, new_session_id); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed."); + ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed."); @@ -805,9 +888,17 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d return ret; } + /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. + /// These session_ids come from the same model, so the values of session_id are the same. + /// Update session_id for infer in load model to avoid the same session_id. + uint64_t new_session_id; + ret = GenSessionId(new_session_id); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); + ret = UpdateSessionId(davinci_model, new_session_id); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); + GenModelId(&model_id); davinci_model->SetId(model_id); - davinci_model->SetSessionId(model_id); ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids); if (ret != SUCCESS) { GELOGE(ret, "set model queue ids failed."); @@ -840,6 +931,22 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid Model ID %u to start! ", model_id); + GeModelPtr ge_model_current = davinci_model->GetGeModel(); + bool need_destroy_aicpu_kernel = false; + bool result = ge::AttrUtils::GetBool(ge_model_current, kNeedDestroySpecifiedAicpuKernel, need_destroy_aicpu_kernel); + if (result && need_destroy_aicpu_kernel) { + GELOGI("Get attr %s successfully, start to destroy specified aicpu kernel.", kNeedDestroySpecifiedAicpuKernel); + + // Zero copy is enabled by default, no need to judge. + uint64_t session_id_davinci = davinci_model->GetSessionId(); + uint32_t model_id_davinci = davinci_model->GetModelId(); + Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci); + if (status != SUCCESS) { + GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci, + model_id_davinci); + } + } + Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); if (status == SUCCESS) { GELOGI("Execute model %u success.", model_id); @@ -920,4 +1027,23 @@ void ModelManager::GenModelId(uint32_t *id) { std::lock_guard lock(map_mutex_); *id = ++max_model_id_; } + +Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetOrigInputInfo failed, invalid model_id is %u.", + model_id); + + return davinci_model->GetOrigInputInfo(index, orig_input_info); +} + +Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, + std::vector &input_dims, + std::vector &output_dims) { + std::shared_ptr davinci_model = GetModel(model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, + "GetAllAippInputOutputDims failed, invalid model_id is %u.", model_id); + + return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims); +} + } // namespace ge diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h index b79f388a..9a94e5c9 100644 --- a/src/ge/graph/load/new_model_manager/model_manager.h +++ b/src/ge/graph/load/new_model_manager/model_manager.h @@ -17,6 +17,7 @@ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ +#include #include #include #include @@ -25,7 +26,6 @@ #include #include #include -#include #include "cce/aicpu_engine_struct.h" #include "common/ge_inner_error_codes.h" #include "common/ge_types.h" @@ -188,6 +188,16 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// ge::Status GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info); + /// + /// @ingroup ge + /// @brief Get AIPP info + /// @param [in] model_id + /// @param [in] index + /// @param [out] aipp_info + /// @return execute result + /// + ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + /// /// @ingroup domi_ome /// @brief set model input and output size zero copy @@ -202,8 +212,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::vector &inputFormats, std::vector &outputFormats); + ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info); + + ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); + ge::Status SetDevice(int32_t deviceId) const; + ge::Status SetDynamicSize(uint32_t model_id, const std::vector &batch_num); + /// /// @ingroup domi_ome /// @brief Get model according to given id @@ -226,6 +242,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); + ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); + + ge::Status GenSessionId(uint64_t &session_id); + + ge::Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, + std::vector &output_dims); + private: /// /// @ingroup domi_ome @@ -253,6 +276,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DeleteModel(uint32_t id); void GenModelId(uint32_t *id); + ge::Status UpdateSessionId(std::shared_ptr &davinci_model, uint64_t session_id); std::map> model_map_; std::map> hybrid_model_map_; @@ -260,6 +284,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { uint32_t max_model_id_; std::mutex map_mutex_; std::mutex sess_ids_mutex_; + std::mutex session_id_create_mutex_; + uint64_t session_id_bias_; std::set sess_ids_; }; } // namespace ge diff --git a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index 8529b90f..0ee9727a 100644 --- a/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -177,6 +177,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + (void)rtStreamDestroy(stream); return RT_FAILED; } GELOGD("hccl_stream addr is=%p", stream); diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc index 635fec5d..95580a15 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc @@ -67,6 +67,18 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return FAILED; } + const auto &ext_info = kernel_ex_def.kernel_ext_info(); + if (!ext_info.empty()) { + auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, + GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); + return FAILED;) + rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, + GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); + return FAILED;) + } + // 2.1 get loop cond variable for tensor array write uint64_t step_id_addr = 0; OpDescPtr step_id_node = davinci_model_->GetVariableOp(NODE_NAME_GLOBAL_STEP); @@ -77,7 +89,9 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin } } - auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; + auto session_id = davinci_model_->GetSessionId(); + fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID = session_id; + // 2.2 Collect aicpu kernel uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID; GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS, @@ -97,8 +111,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = static_cast(reinterpret_cast(workspace_base_addr)); fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0; - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(ext_info_addr_); rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;) @@ -149,8 +163,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = workspace_base_addr; fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = input_output_addr; fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = 0; - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info.size(); + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(ext_info_addr_); // 4. Create session GE_CHECK_NOTNULL(ModelManager::GetInstance()); @@ -291,6 +305,15 @@ Status KernelExTaskInfo::Release() { input_output_addr_ = nullptr; } } + if (ext_info_addr_ != nullptr) { + rtError_t rt_ret = rtFree(ext_info_addr_); + if (rt_ret != RT_ERROR_NONE) { + GELOGW("rtFree ext_info_addr[%p] error, ret: 0x%X", ext_info_addr_, rt_ret); + ret = FAILED; + } else { + ext_info_addr_ = nullptr; + } + } return ret; } diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h index 8903a17c..ff8f3119 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h @@ -31,6 +31,7 @@ class KernelExTaskInfo : public TaskInfo { davinci_model_(nullptr), kernel_buf_(nullptr), input_output_addr_(nullptr), + ext_info_addr_(nullptr), dump_args_(nullptr) {} ~KernelExTaskInfo() override {} @@ -64,6 +65,7 @@ class KernelExTaskInfo : public TaskInfo { DavinciModel *davinci_model_; void *kernel_buf_; void *input_output_addr_; + void *ext_info_addr_; void *dump_args_; OpDescPtr op_desc_ = nullptr; uint32_t args_offset_ = 0; diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index df0ed5fd..390e4e99 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -414,6 +414,7 @@ Status KernelTaskInfo::Release() { FreeRtMem(&custom_info_.output_descs); FreeRtMem(&custom_info_.output_addrs); FreeRtMem(&custom_info_.attr_handle); + FreeRtMem(&aicpu_ext_info_addr_); if (ctx_.argsOffset != nullptr) { delete[] ctx_.argsOffset; @@ -792,6 +793,16 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } } + auto aicpu_param_head = reinterpret_cast(args_addr.get()); + const auto &ext_info = kernel_def.kernel_ext_info(); + auto init_ret = InitAicpuTaskExtInfo(ext_info); + if (init_ret != SUCCESS) { + GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); + return init_ret; + } + aicpu_param_head->extInfoAddr = reinterpret_cast(aicpu_ext_info_addr_); + aicpu_param_head->extInfoLength = reinterpret_cast(ext_info.size()); + // malloc device memory for args rtError_t rt_ret = rtMalloc(static_cast(&args_), args_size_, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { @@ -823,6 +834,24 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return SUCCESS; } +Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { + if (ext_info.empty()) { + return SUCCESS; + } + auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); + return FAILED; + } + rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); + return FAILED; + } + + return SUCCESS; +} + Status KernelTaskInfo::StoreInputOutputTensor(const std::vector &input_data_addrs, const std::vector &output_data_addrs, const std::vector<::tagCcAICPUTensor> &input_descs, diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h index e6753b10..41ed5728 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.h @@ -100,6 +100,8 @@ class KernelTaskInfo : public TaskInfo { Status InitAicpuTask(uint32_t op_index, const domi::KernelDef &kernel_def); + Status InitAicpuTaskExtInfo(const std::string &ext_info); + Status StoreInputOutputTensor(const std::vector &input_data_addrs, const std::vector &output_data_addrs, const std::vector<::tagCcAICPUTensor> &input_descs, @@ -152,6 +154,9 @@ class KernelTaskInfo : public TaskInfo { DavinciModel *davinci_model_; uint32_t args_offset_ = 0; + // aicpu ext_info device mem + void *aicpu_ext_info_addr_ = nullptr; + // For super kernel uint32_t skt_id_; std::string stub_func_name_; diff --git a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc index 4c430ff9..d2ad474a 100644 --- a/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc +++ b/src/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc @@ -133,10 +133,11 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) rt_ret = rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); + GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) rt_ret = rtKernelConfigTransArg(hbm_nav_table_addr, sizeof(uint64_t), 0, &hbm_nav_table_addr_pys); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtKernelConfigTransArg failed. error: 0x%X", rt_ret); - return FAILED;) + GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) GELOGD("SKT: hbm_nav_table_addr %p, hbm_nav_table_addr_pys %p", hbm_nav_table_addr, hbm_nav_table_addr_pys); // Create the necessary metadata for the super kernel @@ -159,7 +160,8 @@ Status SuperKernelFactory::FuseKernels(const std::vector &stub_func_list GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc failed. error: 0x%X", rt_ret); return FAILED;) rt_ret = rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); - GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); return FAILED;) + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy failed. error: 0x%X", rt_ret); + GE_CHK_RT(rtFree(hbm_nav_table_addr)); return FAILED;) // Create the necessary metadata for the super kernel h = new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim); } diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index 9b0ceeb3..dd4855b6 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -41,20 +41,24 @@ #include "graph/ge_local_context.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/util/rt_context_util.h" +#include "graph/partition/dynamic_shape_partition.h" #include "graph/passes/addn_pass.h" #include "graph/passes/atomic_addr_clean_pass.h" #include "graph/passes/cast_remove_pass.h" #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/compile_nodes_pass.h" +#include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" #include "graph/passes/constant_fuse_same_pass.h" #include "graph/passes/control_trigger_pass.h" +#include "graph/passes/ctrl_edge_transfer_pass.h" #include "graph/passes/dimension_adjust_pass.h" +#include "graph/passes/dimension_compute_pass.h" #include "graph/passes/flow_ctrl_pass.h" #include "graph/passes/hccl_group_pass.h" #include "graph/passes/hccl_memcpy_pass.h" -#include "graph/passes/identity_pass.h" #include "graph/passes/identify_reference_pass.h" +#include "graph/passes/identity_pass.h" #include "graph/passes/iterator_op_pass.h" #include "graph/passes/link_gen_mask_nodes_pass.h" #include "graph/passes/merge_pass.h" @@ -63,10 +67,11 @@ #include "graph/passes/permute_pass.h" #include "graph/passes/prune_pass.h" #include "graph/passes/replace_with_empty_const_pass.h" -#include "graph/passes/reshape_remove_pass.h" #include "graph/passes/reshape_recovery_pass.h" +#include "graph/passes/reshape_remove_pass.h" #include "graph/passes/same_transdata_breadth_fusion_pass.h" #include "graph/passes/subgraph_pass.h" +#include "graph/passes/switch_data_edges_bypass.h" #include "graph/passes/switch_dead_branch_elimination.h" #include "graph/passes/switch_logic_remove_pass.h" #include "graph/passes/switch_op_pass.h" @@ -76,14 +81,10 @@ #include "graph/passes/transop_symmetry_elimination_pass.h" #include "graph/passes/transop_without_reshape_fusion_pass.h" #include "graph/passes/transpose_transdata_pass.h" -#include "graph/passes/dimension_compute_pass.h" #include "graph/passes/variable_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" #include "graph/passes/variable_ref_delete_op_pass.h" #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" -#include "graph/passes/cond_remove_pass.h" -#include "graph/passes/ctrl_edge_transfer_pass.h" -#include "graph/partition/dynamic_shape_partition.h" #include "graph/utils/tensor_adapter.h" #include "inc/pass_manager.h" #include "init/gelib.h" @@ -369,14 +370,15 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetGraph(), inputs, compute_graph, session_id); GM_RUN_AND_DUMP("OptimizeOriginalGraph", graph_optimize_.OptimizeOriginalGraph, compute_graph); + GM_RUN_AND_DUMP("PrepareRunningFormatRefiner", graph_preparer_.PrepareRunningFormatRefiner); GM_RUN_AND_DUMP("RefineRunningFormat", graph_optimize_.OptimizeOriginalGraphJudgeInsert, compute_graph); + GE_RUN(GraphManager, graph_preparer_.RecordAIPPInfo, compute_graph); if (IsTailingOptimization()) { GM_RUN_AND_DUMP("OptimizeSwitchOp", graph_preparer_.SwitchOpOptimize, compute_graph); } GM_RUN_AND_DUMP("Optimize1", OptimizeStage1, compute_graph); GM_RUN_AND_DUMP("InferShape2", compute_graph->InferShapeInNeed); - // TODO: to be delete const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); if (unknown_shape_skip != nullptr) { PassManager graph_pass; @@ -423,7 +425,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: return ret; } } - ret = LoadGraph(ge_root_model, graph_node); + if (!graph_node->IsAsync()) { + ret = LoadGraph(ge_root_model, graph_node); + } else { + ret = LoadGraphAsync(ge_root_model, graph_node); + } if (ret != SUCCESS) { GELOGE(ret, "LoadGraph Failed."); return ret; @@ -432,7 +438,11 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId()); } else if (!graph_node->GetLoadFlag()) { GeRootModelPtr ge_root_model_ptr = graph_node->GetGeRootModel(); - ret = LoadGraph(ge_root_model_ptr, graph_node); + if (!graph_node->IsAsync()) { + ret = LoadGraph(ge_root_model_ptr, graph_node); + } else { + ret = LoadGraphAsync(ge_root_model_ptr, graph_node); + } if (ret != SUCCESS) { GELOGE(ret, "LoadGraph Failed."); return ret; @@ -587,7 +597,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector &inputs, - GeRootModelPtr &ge_root_model) { + GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id); if (inputs.empty()) { GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); @@ -712,15 +722,10 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorGetGraphId()); return GE_GRAPH_ALREADY_RUNNING; } + graph_node->SetAsync(async); // set graph's run flag graph_node->SetRunFlag(true); - struct timeval tv; - if (gettimeofday(&tv, nullptr) != 0) { - GELOGE(INTERNAL_ERROR, "get the time of day failed."); - return INTERNAL_ERROR; - } - uint64_t session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); graph_node->SetRunFlag(false); if (ret != SUCCESS) { @@ -954,6 +959,9 @@ Status GraphManager::ParseOptions(const std::map &opti } options_.enable_print_op_pass = true; ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass); + + options_.is_single_op = false; + ret = ParseOption(options, SINGLE_OP_FLAG, options_.is_single_op); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID); @@ -1554,6 +1562,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GELOGI("get ge.exec.variable_acc failed. set default value."); } PassManager after_merge_passes; + GE_CHK_STATUS_RET( + after_merge_passes.AddPass("OptimizeStage1_1::SwitchDataEdgesBypass", new (std::nothrow) SwitchDataEdgesBypass)); GE_CHK_STATUS_RET( after_merge_passes.AddPass("OptimizeStage1_1::ConstantFuseSamePass", new (std::nothrow) ConstantFuseSamePass)); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::CommonSubexpressionEliminationPass", @@ -1579,8 +1589,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GE_IF_BOOL_EXEC(options == "default" || options == "1", GELOGI("turn on variable accelerator"); GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::VariableOpPass", new (std::nothrow) VariableOpPass(&var_acc_ctrl_)))) - GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::TransOpDepthFusionPass", new (std::nothrow) TransOpDepthFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpWithoutReshapeFusionPass", new (std::nothrow) TransOpWithoutReshapeFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", @@ -1660,7 +1668,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); return ret; } - return SUCCESS; } @@ -1688,10 +1695,6 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); - HcclGroupPass hccl_group_pass; - if (IsTailingOptimization()) { - names_to_passes.emplace_back("HcclGroupPass", &hccl_group_pass); - } GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); @@ -1708,19 +1711,12 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { PassManager pass_for_control_attr_optimize; if (options_.train_graph_flag) { - // TODO: to be delete const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); if (unknown_shape_skip == nullptr) { GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::FlowCtrlPass", new (std::nothrow) FlowCtrlPass)) } } - // TODO: to be delete - const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); - if (unknown_shape_skip == nullptr) { - GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", - new (std::nothrow) SubgraphPass)); - } GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::MultiBatchPass", new (std::nothrow) MultiBatchPass)) @@ -1739,6 +1735,14 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::AfterMergePasses::AtomicAddrCleanPass", new (std::nothrow) AtomicAddrCleanPass)) + const char *unknown_shape_skip = std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION"); + if (unknown_shape_skip == nullptr) { + // SubgraphPass solves memory_assign_conflicts by insert MemcpyAsync node, which depends on multi attrs and + // graph-structure. So try not to add new pass after SubgraphPass. + GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::SubgraphPass", + new (std::nothrow) SubgraphPass)); + } + GE_TIMESTAMP_START(pass_for_control_attr_optimize); ret = pass_for_control_attr_optimize.Run(compute_graph); GE_TIMESTAMP_END(pass_for_control_attr_optimize, "OptimizeStage2::ControlAttrOptimize"); @@ -1908,6 +1912,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G graph_node->SetRunFlag(false); return ret; } + graph_node->SetLoadFlag(true); ge_root_model->SetModelId(model_id_info.model_id); graph_node->SetGeRootModel(ge_root_model); } diff --git a/src/ge/graph/manager/graph_manager.h b/src/ge/graph/manager/graph_manager.h index dec88cdc..8ab28316 100644 --- a/src/ge/graph/manager/graph_manager.h +++ b/src/ge/graph/manager/graph_manager.h @@ -99,7 +99,8 @@ class GraphManager { /// @param [out] models build result /// @return Status result of function /// - ge::Status BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &models); + ge::Status BuildGraph(const GraphId &graph_id, const std::vector &inputs, GeRootModelPtr &models, + uint64_t session_id = 0, bool async = false); /// /// @ingroup ge_graph diff --git a/src/ge/graph/manager/graph_manager_utils.cc b/src/ge/graph/manager/graph_manager_utils.cc index dd5c5fbb..90f91c8e 100644 --- a/src/ge/graph/manager/graph_manager_utils.cc +++ b/src/ge/graph/manager/graph_manager_utils.cc @@ -40,6 +40,7 @@ GraphNode::GraphNode(GraphId graph_id) compute_graph_(nullptr), build_flag_(false), load_flag_(false), + async_(false), ge_model_(nullptr), sem_(1) { graph_run_async_listener_ = MakeShared(); diff --git a/src/ge/graph/manager/graph_manager_utils.h b/src/ge/graph/manager/graph_manager_utils.h index 746933a9..869d4a81 100644 --- a/src/ge/graph/manager/graph_manager_utils.h +++ b/src/ge/graph/manager/graph_manager_utils.h @@ -152,6 +152,9 @@ class GraphNode { bool GetRunFlag() const { return run_flag_; } void SetRunFlag(bool flag) { run_flag_ = flag; } + bool IsAsync() const { return async_; } + void SetAsync(bool flag) { async_ = flag; } + void SetSubGraph(std::vector &subgraph_ptr_list) { subgraph_ptr_list_ = subgraph_ptr_list; } const std::vector &GetAllSubGraph() const { return subgraph_ptr_list_; } @@ -181,6 +184,7 @@ class GraphNode { ComputeGraphPtr compute_graph_; bool build_flag_; bool load_flag_; + bool async_; GeModelPtr ge_model_; GeRootModelPtr ge_root_model_; BlockingQueue sem_; @@ -239,6 +243,7 @@ struct GraphManagerOptions { bool local_fmk_op_flag; bool hcom_parallel; bool enable_print_op_pass; + bool is_single_op; std::map stream_max_parallel_num; std::string output_datatype; std::string original_model_file; @@ -247,7 +252,7 @@ struct GraphManagerOptions { : stream_num(1), perf_level(domi::GEN_TASK_WITHOUT_FUSION), encrypt_mode(-1), - framework_type(domi::FMK_TYPE_T), + framework_type(domi::TENSORFLOW), ek_file(""), cert_file(""), hw_key_file(""), @@ -263,6 +268,7 @@ struct GraphManagerOptions { local_fmk_op_flag(false), hcom_parallel(false), enable_print_op_pass(true), + is_single_op(false), save_original_model("false") {} }; } // namespace ge diff --git a/src/ge/graph/manager/graph_var_manager.cc b/src/ge/graph/manager/graph_var_manager.cc index 813e9256..2982eb89 100644 --- a/src/ge/graph/manager/graph_var_manager.cc +++ b/src/ge/graph/manager/graph_var_manager.cc @@ -301,7 +301,7 @@ Status MemResource::AssignVarMem(const std::string &var_name, uint64_t size, uin return SUCCESS; } -int64_t MemResource::GetVarMemSize() const { return var_mem_size_; } +uint64_t MemResource::GetVarMemSize() const { return var_mem_size_; } void MemResource::UpdateVarMemSize(int64_t mem_size) { var_mem_size_ = mem_size; }; diff --git a/src/ge/graph/manager/graph_var_manager.h b/src/ge/graph/manager/graph_var_manager.h index 6229837c..be839eee 100644 --- a/src/ge/graph/manager/graph_var_manager.h +++ b/src/ge/graph/manager/graph_var_manager.h @@ -177,7 +177,7 @@ class MemResource { Status AssignVarMem(const std::string &var_name, uint64_t size, uint64_t session_id, size_t &mem_offset); - int64_t GetVarMemSize() const; + uint64_t GetVarMemSize() const; void UpdateVarMemSize(int64_t mem_size); diff --git a/src/ge/graph/manager/util/rt_context_util.h b/src/ge/graph/manager/util/rt_context_util.h index 006abb9f..93db9882 100644 --- a/src/ge/graph/manager/util/rt_context_util.h +++ b/src/ge/graph/manager/util/rt_context_util.h @@ -31,6 +31,10 @@ class RtContextUtil { void AddrtContext(rtContext_t context); + const rtContext_t GetNormalModeContext() const { return before_prerun_ctx_; } + + void SetNormalModeContext(rtContext_t context) { before_prerun_ctx_ = context; } + void DestroyrtContexts(); RtContextUtil &operator=(const RtContextUtil &) = delete; @@ -41,8 +45,8 @@ class RtContextUtil { ~RtContextUtil() {} std::vector rtContexts_; + rtContext_t before_prerun_ctx_ = nullptr; }; } // namespace ge #endif // GE_GRAPH_MANAGER_UTIL_RT_CONTEXT_UTIL_H_ - diff --git a/src/ge/graph/optimize/graph_optimize.cc b/src/ge/graph/optimize/graph_optimize.cc index f23ad110..b42c2e01 100644 --- a/src/ge/graph/optimize/graph_optimize.cc +++ b/src/ge/graph/optimize/graph_optimize.cc @@ -34,7 +34,7 @@ const char *const kAicoreEngine = "AIcoreEngine"; namespace ge { GraphOptimize::GraphOptimize() - : optimize_type_(domi::FrameworkType::FMK_TYPE_T), + : optimize_type_(domi::FrameworkType::TENSORFLOW), cal_config_(""), insert_op_config_(""), parse_out_node_(""), @@ -73,7 +73,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { src_index_list.emplace_back(peer_out_anchor->GetIdx()); node_op_desc->SetSrcName(src_name_list); node_op_desc->SetSrcIndex(src_index_list); - GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::FMK_TYPE_T), + GE_IF_BOOL_EXEC(!(node_op_desc->GetType() == NETOUTPUT && domi::GetContext().type == domi::TENSORFLOW), ge::NodePtr peer_owner_node = peer_out_anchor->GetOwnerNode(); input_name_list.emplace_back( peer_owner_node->GetName() + @@ -260,7 +260,7 @@ Status GraphOptimize::OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_ } Status GraphOptimize::SetOptions(const ge::GraphManagerOptions &options) { - if (options.framework_type >= static_cast(domi::FrameworkType::FMK_TYPE_RESERVED)) { + if (options.framework_type >= static_cast(domi::FrameworkType::FRAMEWORK_RESERVED)) { GELOGE(GE_GRAPH_OPTIONS_INVALID, "Optimize Type %d invalid.", options.framework_type); return GE_GRAPH_OPTIONS_INVALID; } @@ -293,7 +293,7 @@ void GraphOptimize::TranFrameOp(ComputeGraphPtr &compute_graph) { // set - framework_type // [No need to verify return value] op->SetType("FrameworkOp"); - if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::FMK_TYPE_T)) { + if (!AttrUtils::SetInt(op, ATTR_NAME_FRAMEWORK_FWK_TYPE, domi::FrameworkType::TENSORFLOW)) { GELOGW("TranFrameOp SetInt ATTR_NAME_FRAMEWORK_FWK_TYPE failed"); } } diff --git a/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc b/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc new file mode 100644 index 00000000..be025730 --- /dev/null +++ b/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.cc @@ -0,0 +1,397 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/optimize/optimizer/allreduce_fusion_pass.h" +#include +#include "common/debug/log.h" +#include "framework/common/debug/ge_log.h" +#include "common/types.h" +#include "common/util.h" +#include "graph/anchor.h" +#include "graph/node.h" +#include "graph/op_desc.h" +#include "graph/utils/attr_utils.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/debug/ge_attr_define.h" +#include "hccl/base.h" +#include "hccl/hcom.h" + +namespace ge { +Status AllReducePass::Run(ge::ComputeGraphPtr graph) { + GELOGI("FusionAllReducePass: start"); + std::vector fusionOps; + std::vector inputGradientSize; + std::vector inputGradientTime; + + static const float inputGradientSizeTemp = 0.0; + static const float inputGradientTimeTemp = 0.0; + + // Get all nodes + for (auto nodePtr : graph->GetDirectNode()) { + GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;); + + ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc(); + GE_IF_BOOL_EXEC(nullptr == opDescPtr, + GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str()); + continue;) + GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(), + // the op is allreduce and fusion > 0, then run fusion + std::int64_t hcom_fusion = 1; + GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion), + GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr " + "by HCOM_ATTR_FUSION")); + GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC( + hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp); + inputGradientTime.push_back(inputGradientTimeTemp);)) + } + // The number of allredecue operator must be more than 1 + GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has " + "%lu allreduce operator", + fusionOps.size()); + return NOT_CHANGED;); + + string group = "group"; + u32 gradientNum = fusionOps.size(); + string model_name_str = graph->GetName(); + const char *model_name = model_name_str.c_str(); + model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()}; + + u32 segmentNum = 0; + u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {}; + + // Call HCCL function: hcom_gradient_segment + GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy"); + GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM, + &segmentNum, segmentIndex), + GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size()); + return FAILED;) + GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully"); + + // check whether segmentNum is legal or not + GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum), + GELOGE(FAILED, + "FusionAllReducePass FAILED: illegal segmentNum=%u, " + "HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u", + segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum); + return FAILED;); + + // check whether segmentIndex is legal or not + GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1), + GELOGE(FAILED, + "FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, " + "segmentIndex[segmentNum-1]=%u, gradientNum=%u", + segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum); + return FAILED;); + + for (uint32_t i = 0; i < segmentNum - 1; i++) { + GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED, + "FusionAllReducePass FAILED: illegal " + "segmentIndex[%u]=%u, segmentIndex[%u]=%u", + i, segmentIndex[i], i + 1, segmentIndex[i + 1]); + return FAILED;); + } + + // check whether fusion is needed or not + GE_IF_BOOL_EXEC( + segmentNum == gradientNum, + GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum); + return NOT_CHANGED;) + + std::unordered_set anchorPtrSet; + std::vector fusionOpPeerOutDataAnchor; + std::vector fusionOpPeerOutDataToInControl; + std::vector fusionOpPeerOutControlAnchor; + std::vector> fusionOpPeerInDataAnchor; + std::vector> fusionOpPeerInControlFromOutData; + std::vector fusionOpPeerInControlAnchor; + ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr; + + // Traversing the segmentNum + uint32_t start = 0; + uint32_t end = 0; + for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) { + end = segmentIndex[segmentIdx]; + GE_IF_BOOL_EXEC(end - start < 1, + GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]); + start = end + 1; continue;); + + ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc(); + GE_CHECK_NOTNULL(originDescPtr); + ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr); + GE_CHECK_NOTNULL(newAllreduceDesc); + + // Cleat buffer + anchorPtrSet.clear(); + fusionOpPeerOutDataAnchor.clear(); + fusionOpPeerOutDataToInControl.clear(); + fusionOpPeerOutControlAnchor.clear(); + fusionOpPeerInDataAnchor.clear(); + fusionOpPeerInControlFromOutData.clear(); + fusionOpPeerInControlAnchor.clear(); + + // Traversing the Allreduce operators of each group + int outDataAnchorIndex = 0; + GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]), + "Get peer outDataAnchor to inDataAnchor failed"); + + GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, + fusionOps[start]), + "Get peer inDataAnchor and inControlAnchor to outDataAnchor failed"); + + GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]), + "Get peer outDataAnchor to inControlAnchor failed"); + GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]), + "Get peer outControlAnchor to inControlAnchor failed"); + GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]), + "Get peer outControlAnchor from inControlAnchor failed"); + GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.", + fusionOps[start]->GetName().c_str()); + + for (uint32_t idx = start + 1; idx <= end; idx++) { + GE_CHK_STATUS_RET( + GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc), + "Get peer outDataAnchor to inDataAnchor failed"); + GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]), + "Get peer outDataAnchor to inControlAnchor failed"); + GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]), + "Get peer outControlAnchor to inControlAnchor failed"); + GE_CHK_STATUS_RET( + GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, + fusionOps[idx], newAllreduceDesc, outDataAnchorIndex), + "Get peerAnchor from outDataAnchor failed"); + GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]), + "Get peer outControlAnchor from inControlAnchor failed"); + + // Delete the node + GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.", + fusionOps[idx]->GetName().c_str()); + } + + NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc); + GE_CHECK_NOTNULL(newAllReducePtr); + // Link the inputDataAnchor + for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) { + GE_CHK_STATUS_RET( + GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast(i))), + "FusionAllReducePass FAILED: add input data edge failed"); + } + + // Link the inputControlAnchor + for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) { + GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()), + "FusionAllReducePass FAILED: add input control edge failed"); + } + + for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) { + GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()), + "FusionAllReducePass FAILED: add edge from out data to incontrol " + "failed"); + } + + // Link the outputDataAnchor + for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) { + auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second; + GE_CHK_STATUS_RET( + GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor), + "FusionAllReducePass FAILED: add output data edge failed"); + } + for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) { + auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second; + GE_CHK_STATUS_RET( + GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first), + peerInControlAnchor), + "FusionAllReducePass FAILED: add edge from out data to in control " + "failed"); + } + + // Link the outputControlAnchor + for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) { + GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]), + "FusionAllReducePass FAILED: add output control edge failed"); + } + + // Link the newAllreduce + if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) { + GE_CHK_STATUS_RET( + GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()), + "FusionAllReducePass FAILED: add input previous control edge failed"); + } + + previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor(); + start = end + 1; + } + + return SUCCESS; +} + +Status AllReducePass::GetPeerOutDataToInData(std::unordered_set &anchorSet, + vector &peerOutDataAnchorVec, + ge::NodePtr &srcNodePtr) { + for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); + OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); + if (anchorSet.count(peerOutDataAnchor.get()) == 0) { + peerOutDataAnchorVec.push_back(peerOutDataAnchor); + anchorSet.insert(peerOutDataAnchor.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); + } + } + return SUCCESS; +} + +Status AllReducePass::GetPeerInAnchorToOutData( + std::unordered_set &anchorSet, std::vector> &fusionOpPeerInDataAnchor, + std::vector> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) { + for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { + GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;); + for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { + GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;); + if (anchorSet.count(peerInDataAnchor.get()) == 0) { + std::pair pairPeerInDataAnchor; + pairPeerInDataAnchor.first = 0; + pairPeerInDataAnchor.second = peerInDataAnchor; + fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor); + anchorSet.insert(peerInDataAnchor.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)); + } + } + + for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { + GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;); + if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { + std::pair pairPeerInControlAnchorFromData; + pairPeerInControlAnchorFromData.first = 0; + pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; + fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData); + anchorSet.insert(peerInControlAnchorFromData.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)); + } + } + } + return SUCCESS; +} + +Status AllReducePass::GetPeerOutDataToInData(std::unordered_set &anchorSet, + vector &peerOutDataAnchorVec, + ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) { + for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); + OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); + GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); + if (anchorSet.count(peerOutDataAnchor.get()) == 0) { + peerOutDataAnchorVec.push_back(peerOutDataAnchor); + anchorSet.insert(peerOutDataAnchor.get()); + if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) != + ge::GRAPH_SUCCESS) { + GELOGW("GetPeerOutDataToInData: AddInputDesc failed"); + } + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); + } + } + return SUCCESS; +} + +Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set &anchorSet, + vector &peerOutDataToInControlVec, + ge::NodePtr &srcNodePtr) { + InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); + GE_CHECK_NOTNULL(inControlAnchor); + for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) { + GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;); + if (anchorSet.count(peerOutDataToInControl.get()) == 0) { + peerOutDataToInControlVec.push_back(peerOutDataToInControl); + anchorSet.insert(peerOutDataToInControl.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor)); + } + } + return SUCCESS; +} + +Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set &anchorSet, + vector &peerOutControlToInControlVec, + ge::NodePtr &srcNodePtr) { + InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); + GE_CHECK_NOTNULL(inControlAnchor); + for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) { + GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;); + if (anchorSet.count(peerOutControlAnchor.get()) == 0) { + peerOutControlToInControlVec.push_back(peerOutControlAnchor); + anchorSet.insert(peerOutControlAnchor.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor)); + } + } + return SUCCESS; +} + +Status AllReducePass::GetPeerAnchorFromOutData( + std::unordered_set &anchorSet, vector> &peerInDataFromOutDataVec, + vector> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr, + ge::OpDescPtr &dstOpDescPtr, int &index) { + for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { + GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;) + if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) { + if (dstOpDescPtr->AddOutputDesc( + outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) { + GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed"); + } + index++; + } + + for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { + GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;) + if (anchorSet.count(peerInDataAnchor.get()) == 0) { + std::pair pairPeerInDataAnchor; + pairPeerInDataAnchor.first = index; + pairPeerInDataAnchor.second = peerInDataAnchor; + peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor); + anchorSet.insert(peerInDataAnchor.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)) + } + } + + for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { + GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;) + if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { + std::pair pairPeerInControlAnchorFromData; + pairPeerInControlAnchorFromData.first = index; + pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; + peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData); + anchorSet.insert(peerInControlAnchorFromData.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)) + } + } + } + return SUCCESS; +} + +Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set &anchorSet, + vector &peerInControlFromOutControlVec, + ge::NodePtr &srcNodePtr) { + OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor(); + GE_CHECK_NOTNULL(outControlAnchor); + for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) { + GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;) + if (anchorSet.count(peerInControlAnchor.get()) == 0) { + peerInControlFromOutControlVec.push_back(peerInControlAnchor); + anchorSet.insert(peerInControlAnchor.get()); + GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor)) + } + } + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h b/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h new file mode 100644 index 00000000..2701ba16 --- /dev/null +++ b/src/ge/graph/optimize/optimizer/allreduce_fusion_pass.h @@ -0,0 +1,55 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ +#define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ + +#include +#include +#include +#include "inc/graph_pass.h" + +namespace ge { +// +class AllReducePass : public GraphPass { + public: + Status Run(ge::ComputeGraphPtr graph) override; + + private: + Status GetPeerOutDataToInData(std::unordered_set &anchorSet, + vector &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr, + ge::OpDescPtr &dstOpDescPtr); + Status GetPeerOutDataToInControl(std::unordered_set &anchorSet, + vector &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr); + Status GetPeerOutControlToInControl(std::unordered_set &anchorSet, + vector &peerOutControlToInControlVec, + ge::NodePtr &srcNodePtr); + Status GetPeerAnchorFromOutData(std::unordered_set &anchorSet, + vector> &peerInDataFromOutDataVec, + vector> &peerInControlFromOutDataVec, + ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index); + Status GetPeerInControlFromOutControl(std::unordered_set &anchorSet, + vector &peerInControlFromOutControlVec, + ge::NodePtr &srcNodePtr); + Status GetPeerOutDataToInData(std::unordered_set &anchorSet, + std::vector &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr); + Status GetPeerInAnchorToOutData(std::unordered_set &anchorSet, + std::vector> &fusionOpPeerInDataAnchor, + std::vector> &fusionOpPeerInControlFromOutData, + ge::NodePtr &srcNodePtr); +}; +} // namespace ge +#endif // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ diff --git a/src/ge/graph/partition/dynamic_shape_partition.cc b/src/ge/graph/partition/dynamic_shape_partition.cc index 4ffd37bd..6a396eef 100644 --- a/src/ge/graph/partition/dynamic_shape_partition.cc +++ b/src/ge/graph/partition/dynamic_shape_partition.cc @@ -745,7 +745,8 @@ Status Cluster::BuildPartitionSubgraph() { } int64_t parent_node_index = 0; for (auto anchor : inputs_) { - auto data_op = MakeShared(std::string("Data_") + std::to_string(parent_node_index), ge::DATA); + auto data_op = + MakeShared(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA); REQUIRE_NOT_NULL(data_op, "Failed new memory for data op."); auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx()); REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc."); @@ -763,7 +764,7 @@ Status Cluster::BuildPartitionSubgraph() { if (outputs_.empty() && control_outputs_.empty()) { return SUCCESS; } - auto net_output_op = MakeShared(NODE_NAME_NET_OUTPUT, ge::NETOUTPUT); + auto net_output_op = MakeShared(subgraph_->GetName() + "_" + NODE_NAME_NET_OUTPUT, ge::NETOUTPUT); REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op."); for (size_t i = 0; i < outputs_.size(); ++i) { GeTensorDesc input_desc; diff --git a/src/ge/graph/partition/graph_partition.cc b/src/ge/graph/partition/graph_partition.cc index b25de017..0dff2570 100644 --- a/src/ge/graph/partition/graph_partition.cc +++ b/src/ge/graph/partition/graph_partition.cc @@ -300,11 +300,9 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr GE_CHECK_NOTNULL(end_graph); const auto &src_node = out_anchor->GetOwnerNode(); const auto &dst_node = peer_in_anchor->GetOwnerNode(); - string engine_end_name; - string engine_pld_name; // link input -> end string end_name = kEndType + std::to_string(graph_info_.num_of_pld_end_); - auto end_op_desc = MakeShared(end_name, END); + auto end_op_desc = MakeShared(end_graph->GetName() + "_" + end_name, END); if (end_op_desc == nullptr) { GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr."); return FAILED; @@ -318,15 +316,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr bool is_need_update_desc = (output_index >= 0) && (graph_info_.mode_ == kPartitioning); if (is_need_update_desc) { if (UpdateEndOpDesc(src_node, output_index, end_op_desc) != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d, engine name is %s", output_index, - engine_end_name.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, input index %d", output_index); return FAILED; } } else { GeTensorDesc input_desc; if (end_op_desc->AddInputDesc(input_desc) != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d, engine name is %s", output_index, - engine_end_name.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "AddInputDesc failed, input index %d", output_index); return FAILED; } } @@ -346,11 +342,11 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr } /// For fe, op id has been set in AddNode, /// we can take op id of srcNode as the mark of parentId now - auto const &src_node_opdesc = src_node->GetOpDesc(); + const auto &src_node_opdesc = src_node->GetOpDesc(); GE_CHECK_NOTNULL(src_node_opdesc); int64_t node_id = src_node_opdesc->GetId(); const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_); - auto pld_op_desc = MakeShared(pld_name, PLACEHOLDER); + auto pld_op_desc = MakeShared(pld_graph->GetName() + "_" + pld_name, PLACEHOLDER); if (pld_op_desc == nullptr) { GELOGE(GRAPH_PARAM_INVALID, "pld_op_desc is nullptr."); return FAILED; @@ -370,15 +366,13 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr is_need_update_desc = (input_index >= 0) && (graph_info_.mode_ == kPartitioning); if (is_need_update_desc) { if (UpdatePldOpDesc(dst_node, input_index, pld_op_desc) != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d, engine name is %s", input_index, - engine_pld_name.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "UpdateEndOpDesc failed, output index %d", input_index); return FAILED; } } else { GeTensorDesc output_desc; if (pld_op_desc->AddOutputDesc(output_desc) != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d, engine name is %s", input_index, - engine_pld_name.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "AddOutputDesc failed, input index %d", input_index); return FAILED; } } @@ -399,8 +393,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr return FAILED; } graph_info_.index_2_end_[graph_info_.num_of_pld_end_] = new_end_node; - graph_info_.pld_2_end_[new_pld_node] = new_end_node; graph_info_.end_2_pld_[new_end_node] = new_pld_node; + graph_info_.pld_2_end_[new_pld_node] = new_end_node; return SUCCESS; } @@ -591,7 +585,8 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorSetOutputContext(graph_info_.output_name_); AddEndPldInformationToSubGraphInfo(sgi); GELOGI("[GraphPartitioner]: subGraph engine name is %s, graph name is %s, stream label is %s", - engine_name.c_str(), sub_graph->GetName().c_str(), sgi->GetStreamLabel().c_str()); + engine_name.c_str(), sub_graph->GetName().c_str(), + sgi->GetStreamLabel().empty() ? "null" : sgi->GetStreamLabel().c_str()); output_subgraphs.push_back(sgi); } } @@ -896,8 +891,8 @@ Status ge::GraphPartitioner::AddPlaceHolderEnd(const AnchorPtr &out_anchor, cons return FAILED; } // nodes in original graph - auto src_node = out_anchor->GetOwnerNode(); - auto dst_node = in_anchor->GetOwnerNode(); + const auto &src_node = out_anchor->GetOwnerNode(); + const auto &dst_node = in_anchor->GetOwnerNode(); if ((src_node == nullptr) || (dst_node == nullptr)) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "src_node or dst_node is null."); return FAILED; diff --git a/src/ge/graph/passes/aicpu_constant_folding_pass.cc b/src/ge/graph/passes/aicpu_constant_folding_pass.cc index 748c8d60..4157b5d6 100644 --- a/src/ge/graph/passes/aicpu_constant_folding_pass.cc +++ b/src/ge/graph/passes/aicpu_constant_folding_pass.cc @@ -323,7 +323,7 @@ Status AicpuConstantFoldingPass::LaunchSingleOpRunTask(const NodePtr &node, cons aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; - aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; std::string task_info; Status ret = kernel_info->GenSingleOpRunTask(node, aicpu_task, task_info); if (ret != SUCCESS) { @@ -378,7 +378,7 @@ Status AicpuConstantFoldingPass::LaunchMemCopyTask(const vector &data_ aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = 0; aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; - aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; std::string task_info; Status ret = kernel_info->GenMemCopyTask(data_infos.size(), aicpu_task, task_info); if (ret != SUCCESS) { diff --git a/src/ge/graph/passes/atomic_addr_clean_pass.cc b/src/ge/graph/passes/atomic_addr_clean_pass.cc index 253ab775..7d9b8dec 100644 --- a/src/ge/graph/passes/atomic_addr_clean_pass.cc +++ b/src/ge/graph/passes/atomic_addr_clean_pass.cc @@ -172,9 +172,12 @@ NodePtr AtomicAddrCleanPass::InsertAtomicAddrCleanNode(ComputeGraphPtr &graph) { if (!session_graph_id.empty()) { (void)AttrUtils::SetStr(op_desc, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); } + // Only flush subgraph name + string node_name = (graph->GetParentGraph() != nullptr) + ? (graph->GetName() + "_" + op_desc->GetName() + session_graph_id) + : (op_desc->GetName() + session_graph_id); - string name = op_desc->GetName() + session_graph_id; - op_desc->SetName(name); + op_desc->SetName(node_name); GELOGI("Create cleanAddr op:%s.", op_desc->GetName().c_str()); // To avoid same name between graphs, set session graph id to this node NodePtr clean_addr_node = graph->AddNodeFront(op_desc); diff --git a/src/ge/graph/passes/cast_remove_pass.cc b/src/ge/graph/passes/cast_remove_pass.cc index 87caf6e4..d18c4b4e 100644 --- a/src/ge/graph/passes/cast_remove_pass.cc +++ b/src/ge/graph/passes/cast_remove_pass.cc @@ -34,8 +34,8 @@ Status CastRemovePass::Run(NodePtr &node) { return PARAM_INVALID; } - // begin with not trans op, and only has one out data node - if (TransOpUtil::IsTransOp(node) || node->GetOutDataNodesSize() != 1) { + // begin with not trans op, and only has one out data anchor + if (TransOpUtil::IsTransOp(node) || node->GetAllOutDataAnchorsSize() != 1) { return SUCCESS; } diff --git a/src/ge/graph/passes/cond_pass.cc b/src/ge/graph/passes/cond_pass.cc index 6f47689b..651cf98b 100644 --- a/src/ge/graph/passes/cond_pass.cc +++ b/src/ge/graph/passes/cond_pass.cc @@ -159,7 +159,7 @@ Status CondPass::GetCondInfoForWhile(const NodePtr &node, ComputeGraphPtr &graph graph = GraphUtils::FindRootGraph(node->GetOwnerComputeGraph())->GetSubgraph(cond_graph_instance_name); GE_CHECK_NOTNULL(graph); - NodePtr net_output_node = graph->FindNode(NODE_NAME_NET_OUTPUT); + NodePtr net_output_node = graph->FindFirstNodeMatchType(NETOUTPUT); GE_CHECK_NOTNULL(net_output_node); // cond_graph has and only has one output uint32_t output_num = net_output_node->GetAllInDataAnchorsSize(); diff --git a/src/ge/graph/passes/cond_remove_pass.cc b/src/ge/graph/passes/cond_remove_pass.cc index a5ba0a19..8bc34fbc 100644 --- a/src/ge/graph/passes/cond_remove_pass.cc +++ b/src/ge/graph/passes/cond_remove_pass.cc @@ -17,8 +17,8 @@ #include "graph/passes/cond_remove_pass.h" #include "common/op/ge_op_utils.h" #include "graph/utils/graph_utils.h" -#include "graph/utils/type_utils.h" #include "graph/utils/node_utils.h" +#include "graph/utils/type_utils.h" namespace { const uint32_t kConditionIndexNum = 1; diff --git a/src/ge/graph/passes/enter_pass.cc b/src/ge/graph/passes/enter_pass.cc index 98ca30a5..84621689 100644 --- a/src/ge/graph/passes/enter_pass.cc +++ b/src/ge/graph/passes/enter_pass.cc @@ -16,11 +16,8 @@ #include "graph/passes/enter_pass.h" -#include - #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" -#include "framework/common/ge_inner_error_codes.h" #include "graph/utils/graph_utils.h" namespace ge { diff --git a/src/ge/graph/passes/flow_ctrl_pass.cc b/src/ge/graph/passes/flow_ctrl_pass.cc index fb05ca6a..03f8d5a6 100644 --- a/src/ge/graph/passes/flow_ctrl_pass.cc +++ b/src/ge/graph/passes/flow_ctrl_pass.cc @@ -188,9 +188,9 @@ NodePtr FlowCtrlPass::AddVariableNode(ComputeGraphPtr &compute_graph, const stri } Status FlowCtrlPass::AddGlobalStepVariableNode(ComputeGraphPtr &compute_graph) { - NodePtr output_node = compute_graph->FindNode(NODE_NAME_NET_OUTPUT); + NodePtr output_node = compute_graph->FindFirstNodeMatchType(NETOUTPUT); if (output_node == nullptr) { - GELOGD("Node %s can't be found in graph %u", NODE_NAME_NET_OUTPUT.c_str(), compute_graph->GetGraphID()); + GELOGD("Node type %s can't be found in graph %u", NETOUTPUT, compute_graph->GetGraphID()); return SUCCESS; } diff --git a/src/ge/graph/passes/for_pass.cc b/src/ge/graph/passes/for_pass.cc index d9a17509..409c345f 100644 --- a/src/ge/graph/passes/for_pass.cc +++ b/src/ge/graph/passes/for_pass.cc @@ -42,7 +42,6 @@ const std::string kAbs = "Abs"; namespace ge { Status ForPass::Run(NodePtr &node) { - GE_CHECK_NOTNULL(node->GetOpDesc()); if (node->GetType() != FOR) { GELOGD("no need for_pass for node %s.", node->GetName().c_str()); return SUCCESS; @@ -78,6 +77,7 @@ Status ForPass::Run(NodePtr &node) { node->GetName().c_str()); // for node has and only has one subgraph + GE_CHECK_NOTNULL(node->GetOpDesc()); node->GetOpDesc()->RemoveSubgraphInstanceName(node->GetOpDesc()->GetSubgraphInstanceName(0)); GELOGI("Transfer for_op to while_op succ, node:%s.", node->GetName().c_str()); diff --git a/src/ge/graph/passes/hccl_memcpy_pass.cc b/src/ge/graph/passes/hccl_memcpy_pass.cc index 44c1b084..5325f56e 100644 --- a/src/ge/graph/passes/hccl_memcpy_pass.cc +++ b/src/ge/graph/passes/hccl_memcpy_pass.cc @@ -52,7 +52,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. NodePtr src_node = src_out_anchor->GetOwnerNode(); std::string src_type = src_node->GetType(); - bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); if (check_src_type && node->GetType() == HCOMALLREDUCE) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { diff --git a/src/ge/graph/passes/iterator_op_pass.cc b/src/ge/graph/passes/iterator_op_pass.cc index 540742cf..e1d452b1 100644 --- a/src/ge/graph/passes/iterator_op_pass.cc +++ b/src/ge/graph/passes/iterator_op_pass.cc @@ -72,9 +72,19 @@ Status IteratorOpPass::Run(ge::ComputeGraphPtr graph) { VarManager::Instance(graph->GetSessionID())->GetCurVarDesc(NODE_NAME_FLOWCTRL_LOOP_PER_ITER, ge_tensor_desc); GE_IF_BOOL_EXEC(status != SUCCESS, GELOGW("Fail to Get var_desc of NODE_NAME_FLOWCTRL_LOOP_PER_ITER failed."); continue); + Status ret; + ret = SetRtContext(rtContext_t(), RT_CTX_NORMAL_MODE); + + // EOS will not be considered if ret is not SUCCESS. + GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGW("Set rt context RT_CTX_GEN_MODE failed."); continue); status = GetVariableValue(graph->GetSessionID(), ge_tensor_desc, NODE_NAME_FLOWCTRL_LOOP_PER_ITER, &loop_per_iter); + ret = SetRtContext(rtContext_t(), RT_CTX_GEN_MODE); + + // The following process will be affected if ret is not SUCCESS. + GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Set rt context RT_CTX_GEN_MODE failed."); return ret); + GE_IF_BOOL_EXEC(status != SUCCESS, GELOGW("Get variable value of NODE_NAME_FLOWCTRL_LOOP_PER_ITER failed."); continue); GELOGI("The value of NODE_NAME_FLOWCTRL_LOOP_PER_ITER is %ld", loop_per_iter); @@ -107,18 +117,8 @@ Status IteratorOpPass::GetVariableValue(uint64_t session_id, const ge::GeTensorD auto logic_var_base = VarManager::Instance(session_id)->GetVarMemLogicBase(); // devcice_addr uint8_t *variable_addr = static_cast(var_mem_base + offset - logic_var_base); - Status ret; - ret = SetRtContext(rtContext_t(), RT_CTX_NORMAL_MODE); - if (ret != SUCCESS) { - GELOGE(ret, "Set rt context RT_CTX_NORMAL_MODE failed."); - return ret; - } + GE_CHK_RT_RET(rtMemcpy(dest, sizeof(int64_t), variable_addr, sizeof(int64_t), RT_MEMCPY_DEVICE_TO_HOST)); - ret = SetRtContext(rtContext_t(), RT_CTX_GEN_MODE); - if (ret != SUCCESS) { - GELOGE(ret, "Set rt context RT_CTX_GEN_MODE failed."); - return ret; - } return SUCCESS; } diff --git a/src/ge/graph/passes/mark_agnostic_pass.cc b/src/ge/graph/passes/mark_agnostic_pass.cc new file mode 100644 index 00000000..6f520dd8 --- /dev/null +++ b/src/ge/graph/passes/mark_agnostic_pass.cc @@ -0,0 +1,40 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/mark_agnostic_pass.h" + +#include "utils/node_utils.h" + +namespace ge { +Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { + for (const auto &node : graph->GetDirectNode()) { + auto node_type = NodeUtils::GetNodeType(*node); + if (node_type == SWITCH || node_type == REFSWITCH || node_type == SWITCHN) { + GELOGD("Mark format agnostic for switch ndoe %s", node->GetName().c_str()); + AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); + AttrUtils::SetListInt(node->GetOpDesc(), "_format_agnostic_except_input", std::vector({1})); + continue; + } + if (node_type == MERGE || node_type == REFMERGE) { + GELOGD("Mark format agnostic for merge node %s", node->GetName().c_str()); + AttrUtils::SetInt(node->GetOpDesc(), "_format_agnostic", 1); + AttrUtils::SetListInt(node->GetOpDesc(), "_format_agnostic_except_output", std::vector({1})); + continue; + } + } + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/src/ge/graph/passes/mark_agnostic_pass.h b/src/ge/graph/passes/mark_agnostic_pass.h new file mode 100644 index 00000000..7fd3189d --- /dev/null +++ b/src/ge/graph/passes/mark_agnostic_pass.h @@ -0,0 +1,29 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_MARK_AGNOSTIC_PASS_H_ +#define GE_MARK_AGNOSTIC_PASS_H_ + +#include "inc/graph_pass.h" + +namespace ge { +class MarkAgnosticPass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; +}; +} // namespace ge + +#endif // GE_MARK_AGNOSTIC_PASS_H_ diff --git a/src/ge/graph/passes/net_output_pass.cc b/src/ge/graph/passes/net_output_pass.cc index 4eed597b..3c83d8ac 100644 --- a/src/ge/graph/passes/net_output_pass.cc +++ b/src/ge/graph/passes/net_output_pass.cc @@ -429,13 +429,28 @@ Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraph } return SUCCESS; } + +Status NetOutputPass::CreateNetOutputNode(OpDescPtr &net_output_desc, ge::ComputeGraphPtr &graph) { + // Only flush subgraph name + string node_name = + (graph->GetParentGraph() != nullptr) ? (graph->GetName() + "_" + NODE_NAME_NET_OUTPUT) : NODE_NAME_NET_OUTPUT; + net_output_desc = MakeShared(node_name, NETOUTPUT); + if (net_output_desc == nullptr) { + GELOGE(MEMALLOC_FAILED, "Make shared net output op failed."); + return MEMALLOC_FAILED; + } + (void)AttrUtils::SetListStr(net_output_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, + std::move(std::vector())); + return SUCCESS; +} + Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { if (graph == nullptr) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); return GE_GRAPH_PARAM_NULLPTR; } GELOGI("NetOutputPass Run."); - NodePtr output_node = graph->FindNode(NODE_NAME_NET_OUTPUT); + NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); OpDescPtr net_output_desc = nullptr; std::vector output_nodes_info; @@ -447,13 +462,10 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { std::move(std::vector())); return ProcessWithNetoutput(graph, output_node); } else { - net_output_desc = MakeShared(NODE_NAME_NET_OUTPUT, NETOUTPUT); - if (net_output_desc == nullptr) { - GELOGE(MEMALLOC_FAILED, "Make shared net output op failed."); - return MEMALLOC_FAILED; + if (CreateNetOutputNode(net_output_desc, graph) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Get net output nodes failed."); + return INTERNAL_ERROR; } - (void)AttrUtils::SetListStr(net_output_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, - std::move(std::vector())); Status ret = GetOutputNode(graph, output_nodes_info); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Get net output nodes failed."); diff --git a/src/ge/graph/passes/net_output_pass.h b/src/ge/graph/passes/net_output_pass.h index 6a022d79..5edf24fc 100644 --- a/src/ge/graph/passes/net_output_pass.h +++ b/src/ge/graph/passes/net_output_pass.h @@ -65,6 +65,16 @@ class NetOutputPass : public GraphPass { /// Status GetOutputNode(const ge::ComputeGraphPtr &graph, std::vector &output_nodes_info); + /// + /// Get the output node of the graph + /// @param [in] graph: Input ComputeGraph + /// @param [in/out] net_output_desc: output netoutput node pair + /// @return SUCCESS: Execution succeed + /// @return OTHERS: Execution failed + /// @author + /// + Status CreateNetOutputNode(OpDescPtr &net_output_desc, ge::ComputeGraphPtr &graph); + /// /// Check if the network output node is legal /// @param [in] graph: Input ComputeGraph diff --git a/src/ge/graph/passes/no_use_reshape_remove_pass.cc b/src/ge/graph/passes/no_use_reshape_remove_pass.cc index 1e78cc40..07f58417 100644 --- a/src/ge/graph/passes/no_use_reshape_remove_pass.cc +++ b/src/ge/graph/passes/no_use_reshape_remove_pass.cc @@ -61,7 +61,7 @@ Status NoUseReshapeRemovePass::Run(ge::NodePtr &node) { std::vector output_4dims = output_desc->GetShape().GetDims(); if (input_desc->GetShape().IsUnknownShape() || output_desc->GetShape().IsUnknownShape()) { - GELOGI("Current Reshape %s is unkown shape which should be kept.", op_desc_ptr->GetName().c_str()); + GELOGI("Current Reshape %s is unknown shape which should be kept.", op_desc_ptr->GetName().c_str()); return SUCCESS; } diff --git a/src/ge/graph/passes/permute_pass.cc b/src/ge/graph/passes/permute_pass.cc index 1b04b3fa..f5fd9dc5 100644 --- a/src/ge/graph/passes/permute_pass.cc +++ b/src/ge/graph/passes/permute_pass.cc @@ -27,9 +27,9 @@ using domi::DOMI_TENSOR_ND; using domi::DOMI_TENSOR_NHWC; -using domi::FMK_TYPE_T; using domi::GetContext; using domi::SUCCESS; +using domi::TENSORFLOW; namespace ge { Status PermutePass::Run(ComputeGraphPtr graph) { @@ -40,7 +40,7 @@ Status PermutePass::Run(ComputeGraphPtr graph) { OpDescPtr op_desc_ptr = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc_ptr); GE_IF_BOOL_EXEC( - op_desc_ptr->GetType() == PERMUTE && GetContext().type == domi::FMK_TYPE_T, + op_desc_ptr->GetType() == PERMUTE && GetContext().type == domi::TENSORFLOW, /// Input format 5D means NHWC in 4D way. So if input origin foramt is NCHW and /// permute paramter list is [0,3,1,2], this permute can be optimised. GE_IF_BOOL_EXEC( diff --git a/src/ge/graph/passes/replace_with_empty_const_pass.cc b/src/ge/graph/passes/replace_with_empty_const_pass.cc index b6d680f7..cb35238b 100644 --- a/src/ge/graph/passes/replace_with_empty_const_pass.cc +++ b/src/ge/graph/passes/replace_with_empty_const_pass.cc @@ -33,6 +33,10 @@ Status ReplaceWithEmptyConstPass::Run(NodePtr &node) { GELOGE(PARAM_INVALID, "Param [opDesc] must not be null."); return PARAM_INVALID; } + if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) { + GELOGI("Node %s is const. Ignore current pass.", node->GetName().c_str()); + return SUCCESS; + } // Node like no op, it has no output if (node->GetOpDesc()->GetAllOutputsDescPtr().empty()) { GELOGI("Node %s has no output desc. Ignore current pass.", node->GetName().c_str()); @@ -105,13 +109,7 @@ Status ReplaceWithEmptyConstPass::ReplaceWithEmptyConst(NodePtr &node_to_replace GELOGI("Node %s has been replaced by empty const %s.", node_to_replace->GetName().c_str(), const_node->GetName().c_str()); } - // Unlink control edge from node_to_replace to graph - if (node_to_replace->GetInControlAnchor() != nullptr) { - node_to_replace->GetInControlAnchor()->UnlinkAll(); - } - if (node_to_replace->GetOutControlAnchor() != nullptr) { - node_to_replace->GetOutControlAnchor()->UnlinkAll(); - } + IsolateAndDeleteNode(node_to_replace, {}); return SUCCESS; } Status ReplaceWithEmptyConstPass::InsertEmptyConst(const GeTensorDesc &out_desc, NodePtr &const_node, diff --git a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc index 5f861660..a1f8b14a 100644 --- a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc +++ b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc @@ -504,8 +504,13 @@ graphStatus SameTransdataBreadthFusionPass::ReuseNodesBeforeTransdata(int anchor GE_CHECK_NOTNULL(transdata_out_anchor); GELOGI("remove edge.src:%s, dst:%s", head_node_anchor->GetOwnerNode()->GetName().c_str(), head_node_peer_anchor->GetOwnerNode()->GetName().c_str()); - if (GraphUtils::RemoveEdge(head_node_anchor, head_node_peer_anchor) != GRAPH_SUCCESS) { - GELOGW("remove edge failed!src:%s, dst:%s", head_node_anchor->GetOwnerNode()->GetName().c_str(), + if (head_node_anchor->IsLinkedWith(head_node_peer_anchor)) { + if (GraphUtils::RemoveEdge(head_node_anchor, head_node_peer_anchor) != GRAPH_SUCCESS) { + GELOGW("remove edge failed!src:%s, dst:%s", head_node_anchor->GetOwnerNode()->GetName().c_str(), + head_node_peer_anchor->GetOwnerNode()->GetName().c_str()); + } + } else { + GELOGW("edge not link now. src:%s, dst:%s", head_node_anchor->GetOwnerNode()->GetName().c_str(), head_node_peer_anchor->GetOwnerNode()->GetName().c_str()); } diff --git a/src/ge/graph/passes/subgraph_pass.cc b/src/ge/graph/passes/subgraph_pass.cc index b677179e..d759aa12 100644 --- a/src/ge/graph/passes/subgraph_pass.cc +++ b/src/ge/graph/passes/subgraph_pass.cc @@ -104,7 +104,6 @@ Status SubgraphPass::SubgraphInputNode(const ComputeGraphPtr &graph, const NodeP // Subgraph Data Node, check for constant input. std::string const_type; if (!NodeUtils::GetConstOpType(in_node, const_type)) { - GELOGE(FAILED, "Get const_op_type failed, node:%s.", in_node->GetName().c_str()); return SUCCESS; } @@ -213,7 +212,7 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP return FAILED; } - NodePtr output_node = while_body->FindNode(NODE_NAME_NET_OUTPUT); + NodePtr output_node = while_body->FindFirstNodeMatchType(NETOUTPUT); if (output_node == nullptr) { GELOGE(FAILED, "net_output_node not exist in graph %s.", while_body->GetName().c_str()); return FAILED; @@ -297,11 +296,7 @@ void SubgraphPass::MarkOutputIndex(const OutDataAnchorPtr &peer_out_anchor, uint if (visited_nodes.count(cur_node) > 0) { continue; } - if (node_to_attr_index.count(cur_node) > 0) { - node_to_attr_index[cur_node].emplace_back(index); - } else { - node_to_attr_index[cur_node] = {index}; - } + node_to_attr_index[cur_node].emplace_back(index); for (const NodePtr &in_node : cur_node->GetInDataNodes()) { nodes.emplace(in_node); } diff --git a/src/ge/graph/passes/switch_data_edges_bypass.cc b/src/ge/graph/passes/switch_data_edges_bypass.cc new file mode 100644 index 00000000..059ad772 --- /dev/null +++ b/src/ge/graph/passes/switch_data_edges_bypass.cc @@ -0,0 +1,221 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "switch_data_edges_bypass.h" + +#include "common/debug/log.h" +#include "common/ge/ge_util.h" +#include "common/op/ge_op_utils.h" +#include "common/util.h" +#include "graph/utils/node_utils.h" + +namespace ge { +namespace { +bool IsSwitchInWhileLoop(const NodePtr &node) { + auto pred_anchor = node->GetInDataAnchor(SWITCH_PRED_INPUT); + if (pred_anchor == nullptr) { + GELOGW("The switch node %s does not have a pred in anchor, the node may be invalid", node->GetName().c_str()); + return true; + } + auto pred_node_anchor = pred_anchor->GetPeerOutAnchor(); + if (pred_node_anchor == nullptr) { + GELOGW("The switch node %s does not have a pred in node, the graph may be invalid", node->GetName().c_str()); + return true; + } + auto pred_node = pred_node_anchor->GetOwnerNode(); + if (pred_node == nullptr) { + GELOGW("The switch node %s does not have a pred in node, the pred-anchor may be invalid", node->GetName().c_str()); + return true; + } + if (pred_node->GetType() == LOOPCOND) { + GELOGD("The switch node %s is in a while loop, skip the bypass process", node->GetName().c_str()); + return true; + } + return false; +} +std::vector> GetOutDataNodesByIndex(const NodePtr &node, int index) { + auto out_anchor = node->GetOutDataAnchor(index); + if (out_anchor == nullptr) { + GELOGE(PARAM_INVALID, "Failed to get out data nodes of index %d from node %s, the anchor does not exists", index, + node->GetName().c_str()); + return {}; + } + std::vector> nodes_and_anchors; + for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { + auto out_node = in_anchor->GetOwnerNode(); + if (out_node != nullptr) { + nodes_and_anchors.emplace_back(out_node, in_anchor); + } + } + return nodes_and_anchors; +} +std::pair GetInDataNodeByIndex(const NodePtr &node, int index) { + auto in_anchor = node->GetInDataAnchor(index); + if (in_anchor == nullptr) { + GELOGD("Failed to get in data node of index %d from node %s, the anchor does not exists", index, + node->GetName().c_str()); + return {}; + } + auto out_anchor = in_anchor->GetPeerOutAnchor(); + if (out_anchor == nullptr) { + GELOGD("Failed to get in data node of index %d from node %s, the data input does not exists", index, + node->GetName().c_str()); + return {}; + } + return {out_anchor->GetOwnerNode(), out_anchor}; +} +NodePtr AddIdentityAfterNode(const NodePtr &node, int index) { + static int identity_counter = 0; + + auto node_desc = node->GetOpDesc(); + if (node_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to add identity after node %s index %d, the op desc is null", + node->GetName().c_str(), index); + return nullptr; + } + auto tensor = node_desc->GetOutputDescPtr(index); + if (tensor == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to find the tensor by index %d from node %s, can not add the identity node", index, + node->GetName().c_str()); + return nullptr; + } + auto anchor = node->GetOutDataAnchor(index); + if (anchor == nullptr) { + GELOGE(OUT_OF_MEMORY, "Failed to add identity after node %s index %d, the out anchor does not exists", + node->GetName().c_str(), index); + return nullptr; + } + + auto identity_opdesc = + MakeShared("SwitchDataEdgesByPass_Identity_" + std::to_string(identity_counter++), IDENTITY); + if (identity_opdesc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Failed to add identity after node %s index %d", node->GetName().c_str(), index); + return nullptr; + } + auto ret1 = identity_opdesc->AddInputDesc("x", *tensor); + auto ret2 = identity_opdesc->AddOutputDesc("y", *tensor); + auto identity = node->GetOwnerComputeGraph()->AddNode(identity_opdesc); + if (ret1 != GRAPH_SUCCESS || ret2 != GRAPH_SUCCESS || identity == nullptr) { + GELOGE(OUT_OF_MEMORY, "Failed to add identity after node %s index %d", node->GetName().c_str(), index); + return nullptr; + } + (void)anchor->LinkTo(identity->GetInDataAnchor(0)); + + return identity; +} +NodePtr AddMemcpyBeforeNode(const NodePtr &node, int index) { + static int counter = 0; + + auto node_desc = node->GetOpDesc(); + if (node_desc == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to add memcpy before node %s index %d, null op desc", node->GetName().c_str(), + index); + return nullptr; + } + auto tensor = node_desc->GetInputDescPtr(index); + if (tensor == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to find the tensor by index %d from node %s, can not add the memcpy node", index, + node->GetName().c_str()); + return nullptr; + } + auto anchor = node->GetInDataAnchor(index); + if (anchor == nullptr) { + GELOGE(INTERNAL_ERROR, "Failed to add memcpy before node %s index %d, the in anchor does not exists", + node->GetName().c_str(), index); + return nullptr; + } + + auto memcpy_opdesc = MakeShared("SwitchDataEdgesByPass_Memcpy_" + std::to_string(counter++), MEMCPYASYNC); + if (memcpy_opdesc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Failed to add memcpy before node %s index %d", node->GetName().c_str(), index); + return nullptr; + } + auto ret1 = memcpy_opdesc->AddInputDesc(*tensor); + auto ret2 = memcpy_opdesc->AddOutputDesc(*tensor); + auto memcpy_node = node->GetOwnerComputeGraph()->AddNode(memcpy_opdesc); + if (ret1 != GRAPH_SUCCESS || ret2 != GRAPH_SUCCESS || memcpy_node == nullptr) { + GELOGE(OUT_OF_MEMORY, "Failed to add memcpy before node %s index %d", node->GetName().c_str(), index); + return nullptr; + } + (void)memcpy_node->GetOutDataAnchor(0)->LinkTo(anchor); + + return memcpy_node; +} +Status BypassSwitchOut(const NodePtr &switch_node, int out_index) { + auto nodes_and_anchors = GetOutDataNodesByIndex(switch_node, out_index); + if (nodes_and_anchors.empty()) { + GELOGD("The switch node %s does not has out branch %d, skip the bypass process", switch_node->GetName().c_str(), + out_index); + return SUCCESS; + } + + auto data_node_and_anchor = GetInDataNodeByIndex(switch_node, SWITCH_DATA_INPUT); + if (data_node_and_anchor.first == nullptr) { + GELOGW("Can not bypass switch node %s, the node does not has a data input", switch_node->GetName().c_str()); + return SUCCESS; + } + + auto identity = AddIdentityAfterNode(switch_node, out_index); + GE_CHECK_NOTNULL(identity); + + std::set connected_nodes; + for (const auto &node_and_anchor : nodes_and_anchors) { + auto head_anchor = node_and_anchor.second; + head_anchor->UnlinkAll(); + + auto head_node = node_and_anchor.first; + auto head_node_type = NodeUtils::GetNodeType(*head_node); + if (head_node_type == MERGE || head_node_type == REFMERGE) { + // if the switch connect to the merge directly, insert memcpy before merge + auto memcpy_node = AddMemcpyBeforeNode(head_node, head_anchor->GetIdx()); + GE_CHECK_NOTNULL(memcpy_node); + GELOGD("Add memcpy %s before merge node %s", memcpy_node->GetName().c_str(), head_node->GetName().c_str()); + head_node = memcpy_node; + head_anchor = memcpy_node->GetInDataAnchor(0); + } + (void)data_node_and_anchor.second->LinkTo(head_anchor); + if (connected_nodes.insert(head_node.get()).second) { + (void)identity->GetOutControlAnchor()->LinkTo(head_node->GetInControlAnchor()); + } + } + GELOGI("Bypass switch %s out index %d success", switch_node->GetName().c_str(), out_index); + return SUCCESS; +} +} // namespace +Status SwitchDataEdgesBypass::Run(ComputeGraphPtr graph) { + for (const auto &node : graph->GetDirectNode()) { + auto ret = BypassSwitch(node); + GE_CHK_STATUS_RET(ret, "By pass switch node %s failed", node->GetName().c_str()) + } + return SUCCESS; +} +Status SwitchDataEdgesBypass::BypassSwitch(const NodePtr &node) { + auto node_type = NodeUtils::GetNodeType(*node); + if ((node_type != SWITCH) && (node_type != REFSWITCH)) { + return SUCCESS; + } + if (IsSwitchInWhileLoop(node)) { + return SUCCESS; + } + + auto ret = BypassSwitchOut(node, SWITCH_FALSE_OUTPUT); + GE_CHK_STATUS_RET(ret, "By pass switch node %s false output failed", node->GetName().c_str()) + ret = BypassSwitchOut(node, SWITCH_TRUE_OUTPUT); + GE_CHK_STATUS_RET(ret, "By pass switch node %s true output failed", node->GetName().c_str()) + + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/src/ge/graph/passes/switch_data_edges_bypass.h b/src/ge/graph/passes/switch_data_edges_bypass.h new file mode 100644 index 00000000..8c2f492a --- /dev/null +++ b/src/ge/graph/passes/switch_data_edges_bypass.h @@ -0,0 +1,32 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_SWITCH_DATA_EDGES_BYPASS_H_ +#define GE_SWITCH_DATA_EDGES_BYPASS_H_ + +#include "inc/graph_pass.h" + +namespace ge { +class SwitchDataEdgesBypass : public GraphPass { + public: + Status Run(ComputeGraphPtr graph) override; + + private: + Status BypassSwitch(const NodePtr &node); +}; +} // namespace ge + +#endif // GE_SWITCH_DATA_EDGES_BYPASS_H_ \ No newline at end of file diff --git a/src/ge/graph/passes/switch_dead_branch_elimination.cc b/src/ge/graph/passes/switch_dead_branch_elimination.cc index c4ae4647..f398d8df 100644 --- a/src/ge/graph/passes/switch_dead_branch_elimination.cc +++ b/src/ge/graph/passes/switch_dead_branch_elimination.cc @@ -18,9 +18,9 @@ #include #include -#include "framework/common/debug/ge_log.h" #include "common/ge_inner_error_codes.h" #include "common/types.h" +#include "framework/common/debug/ge_log.h" #include "graph/common/omg_util.h" #include "graph/passes/pass_utils.h" #include "graph/utils/graph_utils.h" diff --git a/src/ge/graph/passes/switch_op_pass.cc b/src/ge/graph/passes/switch_op_pass.cc index b501804f..ed3e9b36 100644 --- a/src/ge/graph/passes/switch_op_pass.cc +++ b/src/ge/graph/passes/switch_op_pass.cc @@ -23,11 +23,11 @@ #include #include #include "common/ge/ge_util.h" -#include "ge/ge_api_types.h" #include "framework/common/debug/ge_log.h" #include "framework/common/debug/log.h" #include "framework/common/ge_inner_error_codes.h" #include "framework/common/types.h" +#include "ge/ge_api_types.h" #include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" #include "graph/ge_context.h" @@ -61,8 +61,7 @@ Status SwitchOpPass::Run(ComputeGraphPtr graph) { for (auto &node : stream_switch_nodes_) { for (auto &out_ctrl_node : node->GetOutControlNodes()) { - GELOGD("branch_head_nodes_ insert %s", out_ctrl_node->GetName().c_str()); - (void)branch_head_nodes_.insert(out_ctrl_node); + MarkHeadNodes(out_ctrl_node, node); } } @@ -544,7 +543,7 @@ Status SwitchOpPass::FindSwitchCondInput(bool pass_switch_flag, OutDataAnchorPtr return SUCCESS; } -int SwitchOpPass::GetGroupId(const NodePtr &node) { +int64_t SwitchOpPass::GetGroupId(const NodePtr &node) { string tailing_optimization_option; bool is_tailing_optimization = false; auto ret = GetContext().GetOption(OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION, tailing_optimization_option); @@ -566,11 +565,11 @@ int SwitchOpPass::GetGroupId(const NodePtr &node) { auto key_num = hccl_group_id.substr(key_index + 1, hccl_group_id.length() - key_index); GELOGI("Node is %s,Hccl group id is %s, key_num is %s", node->GetName().c_str(), hccl_group_id.c_str(), key_num.c_str()); - int num = atoi(key_num.c_str()); + int64_t num = atoi(key_num.c_str()); if (num == 0) { return 0; } - GELOGI("Hccl group id is %s, group id is %d", hccl_group_id.c_str(), num); + GELOGI("Hccl group id is %s, group id is %ld", hccl_group_id.c_str(), num); return num; } @@ -586,7 +585,7 @@ Status SwitchOpPass::MarkBranchs(OutDataAnchorPtr &peer_cond_anchor, NodePtr &st GE_CHECK_NOTNULL(stream_switch); auto it = cond_node_map_.find(peer_cond_anchor); if (it != cond_node_map_.end()) { - int switch_group_id = GetGroupId(stream_switch); + int64_t switch_group_id = GetGroupId(stream_switch); auto switch_group_it = it->second.find(switch_group_id); if (switch_group_it == it->second.end()) { std::list false_node_list; @@ -605,7 +604,7 @@ Status SwitchOpPass::MarkBranchs(OutDataAnchorPtr &peer_cond_anchor, NodePtr &st switch_group_it->second[index].emplace_back(stream_switch); } } else { - int switch_group_id = GetGroupId(stream_switch); + int64_t switch_group_id = GetGroupId(stream_switch); map>> switch_group_map; std::list false_node_list; std::list true_node_list; @@ -741,7 +740,7 @@ Status SwitchOpPass::UpdateCondBranch(NodePtr &node) { for (auto &out_node : cur_node->GetOutAllNodes()) { const std::string out_type = out_node->GetType(); bool stop_flag = (end_type_set.count(out_type) > 0) || - ((type != STREAMSWITCH) && (branch_head_nodes_.count(out_node) > 0)) || + ((branch_head_nodes_.count(out_node) > 0) && (branch_head_nodes_[out_node] != node)) || (((type == ENTER) || (type == REFENTER)) && (out_type != STREAMACTIVE)); if (!stop_flag) { nodes.push(out_node); @@ -1179,6 +1178,34 @@ void SwitchOpPass::ReplaceControlEdges(NodePtr &old_node, NodePtr &new_node) { CopyControlEdges(old_node, new_node); RemoveControlEdges(old_node); } + +/// +/// @brief Mark node as head_node of stream_switch +/// @param [in] node +/// @param [in] stream_switch +/// @return void +/// +void SwitchOpPass::MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch) { + std::stack nodes; + nodes.push(node); + std::set visited; + while (!nodes.empty()) { + NodePtr cur_node = nodes.top(); + nodes.pop(); + if (visited.count(cur_node) > 0) { + continue; + } + GELOGD("branch_head_node %s of stream_switch %s", cur_node->GetName().c_str(), stream_switch->GetName().c_str()); + branch_head_nodes_[cur_node] = stream_switch; + if ((cur_node->GetType() == IDENTITY) || (cur_node->GetType() == IDENTITYN)) { + for (auto &out_node : cur_node->GetOutAllNodes()) { + nodes.push(out_node); + } + } + visited.insert(cur_node); + } +} + /// /// @brief Clear Status, uesd for subgraph pass /// @return diff --git a/src/ge/graph/passes/switch_op_pass.h b/src/ge/graph/passes/switch_op_pass.h index 704adcc1..202b919c 100644 --- a/src/ge/graph/passes/switch_op_pass.h +++ b/src/ge/graph/passes/switch_op_pass.h @@ -147,7 +147,9 @@ class SwitchOpPass : public GraphPass { void ReplaceControlEdges(NodePtr &old_node, NodePtr &new_node); - int GetGroupId(const NodePtr &node); + int64_t GetGroupId(const NodePtr &node); + + void MarkHeadNodes(const NodePtr &node, const NodePtr &stream_switch); std::vector switch_nodes_; std::vector merge_nodes_; @@ -155,7 +157,7 @@ class SwitchOpPass : public GraphPass { std::unordered_map> switch_cyclic_map_; std::set bypass_nodes_; - std::set branch_head_nodes_; + std::unordered_map branch_head_nodes_; std::vector stream_switch_nodes_; std::vector need_label_nodes_; std::unordered_map>>> cond_node_map_; diff --git a/src/ge/graph/passes/transop_breadth_fusion_pass.cc b/src/ge/graph/passes/transop_breadth_fusion_pass.cc index b2f66bfc..53f9e825 100644 --- a/src/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/src/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -78,6 +78,14 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No GELOGD("Get stream label %s for node %s, add it to fusion id", stream_label.c_str(), node->GetName().c_str()); id << '-' << stream_label; } + for (const auto &in_ctrl_node : node->GetInControlNodes()) { + // c + // switch-->Identity ---> node + // the control edge from a identity node can not be removed + if (in_ctrl_node->GetType() == IDENTITY) { + id << "-control-in-" << in_ctrl_node->GetName(); + } + } // [Cascade pointer] const auto &input_desc = node->GetOpDesc()->MutableInputDesc(0); const auto &output_desc = node->GetOpDesc()->MutableOutputDesc(0); diff --git a/src/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc b/src/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc index 4b08e956..53c9deca 100644 --- a/src/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc +++ b/src/ge/graph/passes/transop_nearby_allreduce_fusion_pass.cc @@ -28,7 +28,7 @@ Status TransOpNearbyAllreduceFusionPass::Run(NodePtr &node) { return SUCCESS; } - if (node->GetType() == HCOMALLREDUCE) { + if (node->GetType() == HCOMALLREDUCE || node->GetType() == HVDCALLBACKALLREDUCE) { GELOGI("found allreduce op %s", node->GetName().c_str()); Status ret = RemoveNearbyPairedTransOps(node); if (ret != SUCCESS) { diff --git a/src/ge/graph/passes/transop_symmetry_elimination_pass.cc b/src/ge/graph/passes/transop_symmetry_elimination_pass.cc index 0f8f30bf..38b6684b 100644 --- a/src/ge/graph/passes/transop_symmetry_elimination_pass.cc +++ b/src/ge/graph/passes/transop_symmetry_elimination_pass.cc @@ -90,7 +90,8 @@ bool TransOpSymmetryEliminationPass::DescAreSymmetry(const NodePtr &src_node, co const auto &dst_output_shape = dst_output_desc->GetShape().GetDims(); if (src_node->GetType() == CAST && dst_node->GetType() == CAST) { - bool is_format_symmetry = (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND); + bool is_format_symmetry = + (src_input_format == dst_output_format) || (dst_output_format == FORMAT_ND) || (src_input_format == FORMAT_ND); return (src_input_dtype == dst_output_dtype) && is_format_symmetry; } else { return (src_input_dtype == dst_output_dtype) && (src_input_shape == dst_output_shape) && diff --git a/src/ge/graph/passes/unused_op_remove_pass.cc b/src/ge/graph/passes/unused_op_remove_pass.cc index 093d931a..45bbc291 100644 --- a/src/ge/graph/passes/unused_op_remove_pass.cc +++ b/src/ge/graph/passes/unused_op_remove_pass.cc @@ -39,7 +39,7 @@ Status UnusedOpRemovePass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); std::set remove_op_set; vector nodes_to_be_deleted; - if (fmktype_ == FMK_TYPE_T) { + if (fmktype_ == TENSORFLOW) { remove_op_set = kRemoveOpSet; } else { remove_op_set = kOtherRemoveOpSet; diff --git a/src/ge/graph/preprocess/graph_preprocess.cc b/src/ge/graph/preprocess/graph_preprocess.cc index f17d0395..68382f52 100644 --- a/src/ge/graph/preprocess/graph_preprocess.cc +++ b/src/ge/graph/preprocess/graph_preprocess.cc @@ -19,13 +19,16 @@ #include #include #include +#include "common/formats/format_transfers/format_transfer_fractal_nz.h" +#include "common/formats/format_transfers/format_transfer_fractal_z.h" #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_transpose.h" #include "common/helper/model_helper.h" #include "common/math/math_util.h" -#include "common/util/error_manager/error_manager.h" #include "common/op/ge_op_utils.h" +#include "common/util/error_manager/error_manager.h" +#include "common/formats/utils/formats_trans_utils.h" #include "framework/common/debug/ge_log.h" #include "graph/common/ge_call_wrapper.h" #include "graph/common/transop_util.h" @@ -40,6 +43,7 @@ #include "graph/passes/base_pass.h" #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/cond_pass.h" +#include "graph/passes/cond_remove_pass.h" #include "graph/passes/constant_folding_pass.h" #include "graph/passes/constant_fuse_same_pass.h" #include "graph/passes/control_trigger_pass.h" @@ -74,6 +78,7 @@ #include "graph/passes/snapshot_pass.h" #include "graph/passes/stop_gradient_pass.h" #include "graph/passes/subgraph_pass.h" +#include "graph/passes/switch_data_edges_bypass.h" #include "graph/passes/switch_dead_branch_elimination.h" #include "graph/passes/switch_fusion_pass.h" #include "graph/passes/switch_logic_remove_pass.h" @@ -83,11 +88,6 @@ #include "graph/passes/unused_op_remove_pass.h" #include "graph/passes/var_is_initialized_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" -#include "graph/passes/common_subexpression_elimination_pass.h" -#include "graph/passes/replace_with_empty_const_pass.h" -#include "graph/passes/subgraph_pass.h" -#include "graph/passes/replace_transshape_pass.h" -#include "graph/passes/cond_remove_pass.h" #include "graph/preprocess/insert_op/util_insert_aipp_op.h" #include "graph/types.h" #include "graph/utils/tensor_utils.h" @@ -123,6 +123,9 @@ static std::map output_type_str_to_datatype = { {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}}; const char *const kMbatchSwitchnName = "mbatch-switch-name"; +const int64_t kGemmNdShapeSize = 2; +const int64_t kGemmAlignSize32 = 32; +const int64_t kGemmAlignSize16 = 16; OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) { GeTensorPtr tensor = MakeShared(); @@ -1132,11 +1135,114 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No return SUCCESS; } -Status ProcessDataNodeDynShape(NodePtr &node_ptr) { +Status ProcessGemmFractalZ(GeShape &src_shape, std::vector &dst_shape_vec) { + dst_shape_vec.clear(); + if (src_shape.GetDims().size() != kGemmNdShapeSize) { + GELOGE(INTERNAL_ERROR, "gemm shape size must be 2"); + return FAILED; + } + dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(0), kGemmAlignSize32)); + dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(1), kGemmAlignSize16)); + dst_shape_vec.push_back(kGemmAlignSize16); + dst_shape_vec.push_back(kGemmAlignSize32); + return SUCCESS; +} +Status SetInOutForGemm(GeTensorDescPtr &input, GeTensorDescPtr &output, GeShape shape, Format format) { + input->SetShape(shape); + input->SetFormat(format); + output->SetShape(shape); + output->SetFormat(format); + int64_t input_shape_size = 0; + int64_t output_shape_size = 0; + ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size); + ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*output, output_shape_size); + if ((input_graph_status != ge::GRAPH_SUCCESS) && (output_graph_status != ge::GRAPH_SUCCESS)) { + GELOGE(GRAPH_FAILED, "GetTensorSize failed!"); + return FAILED; + } + ge::TensorUtils::SetSize(*input, input_shape_size); + ge::TensorUtils::SetSize(*output, output_shape_size); + return SUCCESS; +} + +Status ProcessSingleOpInput(NodePtr &node_ptr, string &single_op_input_format) { + ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_input_format); auto op_desc = node_ptr->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); auto data_input = op_desc->MutableInputDesc(0); - GE_CHECK_NOTNULL(data_input); + auto data_output = op_desc->MutableOutputDesc(0); + ge::Format src_format = data_input->GetFormat(); + ge::DataType src_dt = data_input->GetDataType(); + ge::GeShape src_shape = data_input->GetShape(); + std::vector dst_shape_vec; + if (input_format == FORMAT_FRACTAL_NZ) { + formats::FormatTransferFractalNz transfer; + if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); + return FAILED; + } + ge::GeShape dst_shape(dst_shape_vec); + if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_NZ) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_NZ desc failed.", op_desc->GetName().c_str()); + return FAILED; + } + } else if (input_format == FORMAT_FRACTAL_Z) { + if (ProcessGemmFractalZ(src_shape, dst_shape_vec) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Op [%s] trans FRACTAL_Z Shape failed.", op_desc->GetName().c_str()); + return FAILED; + } + ge::GeShape dst_shape(dst_shape_vec); + if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_Z) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_Z desc failed.", op_desc->GetName().c_str()); + return FAILED; + } + } + // Gemm shape and format should be set at this stage, temporary solution. + auto out_anchor = node_ptr->GetOutDataAnchor(0); + for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { + GE_CHECK_NOTNULL(in_anchor); + auto index = static_cast(in_anchor->GetIdx()); + ge::NodePtr next_node = in_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(next_node); + auto next_op_desc = next_node->GetOpDesc(); + GE_CHECK_NOTNULL(next_op_desc); + auto input_desc = next_op_desc->MutableInputDesc(index); + GE_CHECK_NOTNULL(input_desc); + input_desc->SetFormat(input_format); + input_desc->SetShape(data_output->GetShape()); + } + return SUCCESS; +} + +Status ProcessSingleOpOutput(OpDescPtr &op_desc, string &single_op_output_format) { + ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_output_format); + auto data_input = op_desc->MutableInputDesc(0); + ge::Format src_format = data_input->GetFormat(); + ge::DataType src_dt = data_input->GetDataType(); + ge::GeShape src_shape = data_input->GetShape(); + std::vector dst_shape_vec; + if (input_format == FORMAT_FRACTAL_NZ) { + formats::FormatTransferFractalNz transfer; + if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str()); + return FAILED; + } + ge::GeShape dst_shape(dst_shape_vec); + data_input->SetShape(dst_shape); + data_input->SetFormat(FORMAT_FRACTAL_NZ); + } + return SUCCESS; +} + +Status ProcessDataNodeDynShape(NodePtr &node_ptr, bool &is_single_op) { + auto op_desc = node_ptr->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + std::string single_op_input_format; + if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_input_format", single_op_input_format))) { + if (ProcessSingleOpInput(node_ptr, single_op_input_format) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Process single op input [%s] failed.", node_ptr->GetName().c_str()); + return FAILED; + } + } bool set_fp16 = false; if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) { return SUCCESS; @@ -1269,9 +1375,16 @@ bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_nod return false; } -Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) { +Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type, bool &is_single_op) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); + std::string single_op_output_format; + if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_output_format", single_op_output_format))) { + if (ProcessSingleOpOutput(op_desc, single_op_output_format) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Process single op output [%s] failed.", node->GetName().c_str()); + return FAILED; + } + } ge::DataType output_data_type = ge::DT_FLOAT; for (const auto &in_anchor : node->GetAllInDataAnchors()) { @@ -1294,10 +1407,6 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) { // Update datatype if (NeedUpdateOutputByOutputTypeParm(output_type, src_node, src_index, output_data_type)) { GELOGI("Enter into process output_type schedule"); - if (src_dtype == output_data_type) { - GELOGI("Data type is same ,no need to transfer."); - continue; - } net_output_input_desc->SetDataType(output_data_type); net_output_input_desc->SetOriginDataType(output_data_type); if (is_dynamic) { @@ -1977,6 +2086,7 @@ Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vectorGetDirectNode()) { - if (AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, group_id)) { - (void)AttrUtils::SetStr(node->GetOpDesc(), ATTR_NAME_HCCL_FUSED_GROUP, ""); - } - } ret = compute_graph->TopologicalSorting(); if (ret != SUCCESS) { GELOGE(ret, "Graph topological sort failed, ret:%u.", ret); @@ -2125,6 +2228,8 @@ Status GraphPrepare::Prepare(ConstGraphPtr graph, const std::vector &u return ret; } + GE_RETURN_IF_ERROR(RecordAIPPInfo(compute_graph_)); + GE_TIMESTAMP_START(OptimizeBeforeSubGraph); if (buffer_optimize_on != nullptr) { @@ -2204,7 +2309,7 @@ Status GraphPrepare::VerifyConstOp(const NodePtr &node) { } Status GraphPrepare::CheckUserInput(const std::vector &user_input) { - if (user_input.empty() || domi::GetContext().is_dynamic_input) { + if (domi::GetContext().is_dynamic_input) { return SUCCESS; } unsigned int node_num = 0; @@ -2373,14 +2478,8 @@ Status GraphPrepare::PrepareOptimize() { return ret; } // The constant for train is CONSTANTOP, and is CONSTANT for inference. They will be unified in future. - if (options_.train_graph_flag) { - for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { - // This can ensure that n is not a null pointer - if (n->GetOpDesc()->GetType() == CONSTANT) { - n->GetOpDesc()->SetType(CONSTANTOP); - } - } - } + TypeConversionOfConstant(); + ret = compute_graph_->TopologicalSorting(); if (ret != SUCCESS) { GELOGE(ret, "Graph topological sort failed, ret:%u.", ret); @@ -2391,6 +2490,27 @@ Status GraphPrepare::PrepareOptimize() { return SUCCESS; } + +void GraphPrepare::TypeConversionOfConstant() { + if (options_.train_graph_flag) { + GELOGD("trans CONSTANT to CONSTANTOP in train."); + for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { + // This can ensure that n is not a null pointer + if (n->GetOpDesc()->GetType() == CONSTANT) { + n->GetOpDesc()->SetType(CONSTANTOP); + } + } + } else { + GELOGD("trans CONSTANTOP to CONSTANT in inferrence."); + for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { + // This can ensure that n is not a null pointer + if (n->GetOpDesc()->GetType() == CONSTANTOP) { + n->GetOpDesc()->SetType(CONSTANT); + } + } + } +} + Status GraphPrepare::OptimizeForPreprocess() { GELOGI("Start optimize for preprocess."); PassManager original_graph_passes; @@ -2657,14 +2777,17 @@ Status GraphPrepare::OptimizeGraphBeforeSubGraph() { return SUCCESS; } Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input) { + compute_graph_->SetInputSize(user_input.size()); + if (user_input.empty()) { + return SUCCESS; + } + auto ret = CheckUserInput(user_input); if (ret != SUCCESS) { GELOGE(ret, "Check user input failed."); return ret; } - compute_graph_->SetInputSize(user_input.size()); - ret = UpdateInput(user_input); if (ret != SUCCESS) { GELOGE(ret, "UpdateInput fail, ret:%u", ret); @@ -2698,14 +2821,14 @@ Status GraphPrepare::UpdateInputOutputByOptions() { } if (node_ptr->GetType() == DATA) { - if (ProcessDataNodeDynShape(node_ptr) != SUCCESS) { + if (ProcessDataNodeDynShape(node_ptr, options_.is_single_op) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process data node failed"); return FAILED; } } if (node_ptr->GetType() == ge::NETOUTPUT) { - if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype) != SUCCESS) { + if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype, options_.is_single_op) != SUCCESS) { GELOGE(INTERNAL_ERROR, "Process netoutput node failed"); return FAILED; } diff --git a/src/ge/graph/preprocess/graph_preprocess.h b/src/ge/graph/preprocess/graph_preprocess.h index 3c8646f7..b90caa86 100644 --- a/src/ge/graph/preprocess/graph_preprocess.h +++ b/src/ge/graph/preprocess/graph_preprocess.h @@ -49,6 +49,7 @@ class GraphPrepare { VarAccelerateCtrl &var_acc_ctrl, uint64_t session_id = 0); Status PrepareDynShape(ConstGraphPtr graph, const std::vector &user_input, ge::ComputeGraphPtr &compute_graph, uint64_t session_id = 0); + Status RecordAIPPInfo(ge::ComputeGraphPtr &compute_graph); Status PrepareRunningFormatRefiner(); void SetOptions(const GraphManagerOptions &options); Status GenerateInfershapeGraph(ConstGraphPtr graph); @@ -99,6 +100,8 @@ class GraphPrepare { bool ConfirmUseOpAndIndexByNode(const ge::NodePtr &var_node, const map> &confirm_ops, ge::NodePtr &use_node); Status GraphEquivalentTransformation(); + void TypeConversionOfConstant(); + ge::ComputeGraphPtr compute_graph_; GraphManagerOptions options_; }; diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 9ce87d38..22128394 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -15,29 +15,29 @@ */ #include "graph/preprocess/insert_op/ge_aipp_op.h" +#include #include #include #include #include -#include -#include "proto/insert_op.pb.h" -#include "graph/debug/ge_attr_define.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/node_utils.h" -#include "graph/utils/op_desc_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" +#include "base_insert_op.h" +#include "common/dynamic_aipp.h" +#include "common/ge/ge_util.h" +#include "common/util.h" +#include "external/graph/operator_factory.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" #include "framework/common/op/ge_op_utils.h" #include "framework/common/types.h" #include "framework/omg/omg_inner_types.h" -#include "common/dynamic_aipp.h" -#include "common/ge/ge_util.h" -#include "common/util.h" +#include "graph/debug/ge_attr_define.h" #include "graph/optimize/common/params.h" -#include "external/graph/operator_factory.h" -#include "base_insert_op.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" +#include "proto/insert_op.pb.h" #define SAVE_AIPP_ATTR(KEY, SAVE_TYPE) \ do { \ @@ -305,6 +305,14 @@ Status AippOp::GetAndCheckTarget(const ComputeGraphPtr &graph, int rank, NodePtr return PARAM_INVALID; } + // In scenario AIPP+CONV2D+POOLING, keep the aipp info to Data, since AIPP disappear after subgraph optimize + GeAttrValue::NAMED_ATTRS aipp_attr; + ConvertParamToAttr(aipp_attr); + if (!AttrUtils::SetNamedAttrs(data_node->GetOpDesc(), ATTR_NAME_AIPP, aipp_attr)) { + GELOGE(INTERNAL_ERROR, "Set name attrs for Data node failed. id: %d", rank); + return INTERNAL_ERROR; + } + if (aipp_params_->input_edge_idx_size() > 0) { for (auto edge_index : aipp_params_->input_edge_idx()) { edge_indexes.insert(edge_index); @@ -412,6 +420,8 @@ Status AippOp::ValidateParams() { "The parameter var_reci_chn_1 can not be configed repeatedly"); GE_CHK_BOOL_RET_STATUS(aipp_params_->var_reci_chn_2_size() <= 1, PARAM_INVALID, "The parameter var_reci_chn_2 can not be configed repeatedly"); + GE_CHK_BOOL_RET_STATUS(aipp_params_->var_reci_chn_3_size() <= 1, PARAM_INVALID, + "The parameter var_reci_chn_3 can not be configed repeatedly"); GE_CHK_BOOL_RET_STATUS(aipp_params_->matrix_r0c0_size() <= 1, PARAM_INVALID, "The parameter matrix_r0c0 can not be configed repeatedly"); @@ -520,8 +530,13 @@ void AippOp::SetCscDefaultValue() { void AippOp::SetDtcDefaultValue() { GE_CHECK_NOTNULL_JUST_RETURN(aipp_params_); CHECK_FALSE_EXEC(aipp_params_->var_reci_chn_0_size() > 0, aipp_params_->add_var_reci_chn_0(DEFAULT_VAR_RECI_CHN)); + GELOGD("var_reci_chn_0 is %f, size is %u.", DEFAULT_VAR_RECI_CHN, aipp_params_->var_reci_chn_0_size()); CHECK_FALSE_EXEC(aipp_params_->var_reci_chn_1_size() > 0, aipp_params_->add_var_reci_chn_1(DEFAULT_VAR_RECI_CHN)); + GELOGD("var_reci_chn_1 is %f, size is %u.", DEFAULT_VAR_RECI_CHN, aipp_params_->var_reci_chn_1_size()); CHECK_FALSE_EXEC(aipp_params_->var_reci_chn_2_size() > 0, aipp_params_->add_var_reci_chn_2(DEFAULT_VAR_RECI_CHN)); + GELOGD("var_reci_chn_2 is %f, size is %u.", DEFAULT_VAR_RECI_CHN, aipp_params_->var_reci_chn_2_size()); + CHECK_FALSE_EXEC(aipp_params_->var_reci_chn_3_size() > 0, aipp_params_->add_var_reci_chn_3(DEFAULT_VAR_RECI_CHN)); + GELOGD("var_reci_chn_3 is %f, size is %u.", DEFAULT_VAR_RECI_CHN, aipp_params_->var_reci_chn_3_size()); } Status AippOp::GenerateOpDesc(OpDescPtr op_desc) { @@ -555,6 +570,7 @@ Status AippOp::GenerateOpDesc(OpDescPtr op_desc) { void AippOp::ConvertParamToAttr(GeAttrValue::NAMED_ATTRS &aipp_attrs) { GE_CHECK_NOTNULL_JUST_RETURN(aipp_params_); SAVE_AIPP_ATTR(aipp_mode, GeAttrValue::INT); + SAVE_AIPP_ATTR(related_input_rank, GeAttrValue::INT); if (aipp_params_->aipp_mode() == domi::AippOpParams::static_) { SAVE_AIPP_ATTR(input_format, GeAttrValue::INT); @@ -582,12 +598,15 @@ void AippOp::ConvertParamToAttr(GeAttrValue::NAMED_ATTRS &aipp_attrs) { SAVE_AIPP_ATTR(mean_chn_0, GeAttrValue::INT); SAVE_AIPP_ATTR(mean_chn_1, GeAttrValue::INT); SAVE_AIPP_ATTR(mean_chn_2, GeAttrValue::INT); + SAVE_AIPP_ATTR(mean_chn_3, GeAttrValue::INT); SAVE_AIPP_ATTR(min_chn_0, GeAttrValue::FLOAT); SAVE_AIPP_ATTR(min_chn_1, GeAttrValue::FLOAT); SAVE_AIPP_ATTR(min_chn_2, GeAttrValue::FLOAT); + SAVE_AIPP_ATTR(min_chn_3, GeAttrValue::FLOAT); SAVE_AIPP_ATTR_LIST(var_reci_chn_0, GeAttrValue::FLOAT); SAVE_AIPP_ATTR_LIST(var_reci_chn_1, GeAttrValue::FLOAT); SAVE_AIPP_ATTR_LIST(var_reci_chn_2, GeAttrValue::FLOAT); + SAVE_AIPP_ATTR_LIST(var_reci_chn_3, GeAttrValue::FLOAT); SAVE_AIPP_ATTR_LIST(matrix_r0c0, GeAttrValue::INT); SAVE_AIPP_ATTR_LIST(matrix_r0c1, GeAttrValue::INT); SAVE_AIPP_ATTR_LIST(matrix_r0c2, GeAttrValue::INT); @@ -646,8 +665,13 @@ Status AippOp::CreateAippData(const ComputeGraphPtr &graph, const NodePtr &aipp_ TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); + string node_name = kDynamicAippData; + // Only flush subgraph name + if (graph->GetParentGraph() != nullptr) { + node_name = graph->GetName() + "_" + node_name; + } // new add aipp_data ops for dynamic aipp param input - OpDescPtr op_desc_ptr_data = MakeShared(kDynamicAippData, AIPPDATA); + OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); auto stat1 = op_desc_ptr_data->AddInputDesc(input_tensor); diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 52799156..49f4d3dc 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -17,6 +17,8 @@ #include "graph/preprocess/insert_op/util_insert_aipp_op.h" #include #include +#include "common/dynamic_aipp.h" +#include "common/formats/utils/formats_trans_utils.h" #include "common/ge/ge_util.h" #include "common/op/ge_op_utils.h" #include "common/util.h" @@ -31,8 +33,6 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" -#include "common/dynamic_aipp.h" -#include "common/formats/utils/formats_trans_utils.h" using domi::AippOpParams; @@ -319,4 +319,158 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt } return SUCCESS; } + +Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map> &data_next_node_map) { + GELOGI("Start to get data and next node %s.", node->GetName().c_str()); + OpDescPtr data_op = node->GetOpDesc(); + GE_CHECK_NOTNULL(data_op); + if (!data_op->HasAttr(ATTR_NAME_AIPP)) { + GELOGI("there is not AIPP info for Data: %s.", data_op->GetName().c_str()); + return SUCCESS; + } + + std::unique_ptr aipp_params(new (std::nothrow) domi::AippOpParams()); + ge::GeAttrValue::NAMED_ATTRS aipp_attr; + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + "Data node do not contain param aipp!"); + GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); + + if (aipp_params->aipp_mode() != domi::AippOpParams::static_) { + return SUCCESS; + } + + for (auto out_data_anchor : node->GetAllOutDataAnchors()) { + GE_CHECK_NOTNULL(out_data_anchor); + auto peer_in_anchors = out_data_anchor->GetPeerInDataAnchors(); + for (auto peer_in_data_anchor : peer_in_anchors) { + GE_CHECK_NOTNULL(peer_in_data_anchor); + const auto &dst_node = peer_in_data_anchor->GetOwnerNode(); + const auto &dst_op = dst_node->GetOpDesc(); + GE_CHECK_NOTNULL(dst_op); + + if (dst_op->GetType() == AIPP || dst_op->GetType() == SWITCHN) { + auto data_iter = data_next_node_map.find(node); + if (data_iter == data_next_node_map.end()) { + std::set next_node_set; + next_node_set.insert(dst_node); + data_next_node_map[node] = next_node_set; + } else { + if (data_next_node_map[node].find(dst_node) == data_next_node_map[node].end()) { + data_next_node_map[node].insert(dst_node); + } + } + } + } + } + + return SUCCESS; +} + +Status InsertNewOpUtil::GetAllAipps(const NodePtr &node, std::vector &aipps) { + GE_CHECK_NOTNULL(node); + OpDescPtr op = node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + GELOGI("Get all aipp node from this node %s.", op->GetName().c_str()); + if (op->GetType() == AIPP) { + aipps.emplace_back(node); + } else if (op->GetType() == SWITCHN) { + for (auto out_data_anchor : node->GetAllOutDataAnchors()) { + GE_CHECK_NOTNULL(out_data_anchor); + auto peer_in_anchors = out_data_anchor->GetPeerInDataAnchors(); + if (peer_in_anchors.size() > 0) { + auto peer_in_anchor = peer_in_anchors.at(0); + GE_CHECK_NOTNULL(peer_in_anchor); + auto dst_aipp_node = peer_in_anchor->GetOwnerNode(); + if (dst_aipp_node->GetType() == AIPP) { + aipps.emplace_back(dst_aipp_node); + } + } + } + } + return SUCCESS; +} + +Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) { + GELOGI("Start to record aipp info to Data."); + std::map> data_next_node_map; + for (auto &node : graph->GetDirectNode()) { + if (node->GetType() == DATA) { + GE_RETURN_IF_ERROR(GetDataRelatedNode(node, data_next_node_map)); + } + } + + for (auto it : data_next_node_map) { + std::vector input_dims; + std::vector output_dims; + auto data_node = it.first; + std::set aipps_or_switchs = it.second; + if (aipps_or_switchs.size() != 1) { + GELOGW("The number of successors swith or aipp of data is more than 1"); + continue; + } + + std::vector aipps; + GE_RETURN_IF_ERROR(GetAllAipps(*aipps_or_switchs.begin(), aipps)); + GELOGI("RecordAIPPInfoToData: Data: name[%s], type[%s], batch size[%u]", data_node->GetName().c_str(), + data_node->GetType().c_str(), aipps.size()); + + for (auto aipp_it : aipps) { + string input; + string output; + GetInputOutputInfo(data_node, aipp_it, input, output); + input_dims.emplace_back(input); + output_dims.emplace_back(output); + } + + if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_INPUTS, input_dims)) { + GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str()); + return FAILED; + } + + if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_OUTPUTS, output_dims)) { + GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str()); + return FAILED; + } + } + + return SUCCESS; +} + +Status InsertNewOpUtil::GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, + std::string &output) { + GE_CHECK_NOTNULL(data_node); + GE_CHECK_NOTNULL(aipp_node); + OpDescPtr data_op = data_node->GetOpDesc(); + GE_CHECK_NOTNULL(data_op); + OpDescPtr aipp_op = aipp_node->GetOpDesc(); + GE_CHECK_NOTNULL(aipp_op); + + // aipp node's original output shape equals to original model data's shape + ConstGeTensorDescPtr output_desc = aipp_op->GetOutputDescPtr(0); + Format orig_format = output_desc->GetOriginFormat(); + DataType orig_data_type = output_desc->GetOriginDataType(); + std::string tensor_name = data_op->GetName(); + size_t dim_num = output_desc->GetOriginShape().GetDimNum(); + int64_t tensor_size = 0; + (void)TensorUtils::CalcTensorMemSize(output_desc->GetOriginShape(), orig_format, orig_data_type, tensor_size); + int64_t input_size = tensor_size; + input = TypeUtils::FormatToSerialString(orig_format) + ":" + TypeUtils::DataTypeToSerialString(orig_data_type) + ":" + + tensor_name + ":" + std::to_string(input_size) + ":" + std::to_string(dim_num) + ":" + + formats::JoinToString(output_desc->GetOriginShape().GetDims()); + + Format format = output_desc->GetFormat(); + DataType data_type = output_desc->GetDataType(); + std::string output_name = aipp_op->GetOutputNameByIndex(0); + size_t output_dim_num = output_desc->GetShape().GetDimNum(); + (void)TensorUtils::CalcTensorMemSize(output_desc->GetShape(), output_desc->GetFormat(), output_desc->GetDataType(), + tensor_size); + int64_t output_size = tensor_size; + output = TypeUtils::FormatToSerialString(format) + ":" + TypeUtils::DataTypeToSerialString(data_type) + ":" + + output_name + ":" + std::to_string(output_size) + ":" + std::to_string(output_dim_num) + ":" + + formats::JoinToString(output_desc->GetShape().GetDims()); + + GELOGI("GetInputOutputInfo: get data[%s] node related aipp[%s] node info, input[%s], output[%s].", + data_node->GetName().c_str(), aipp_node->GetName().c_str(), input.c_str(), output.c_str()); + return SUCCESS; +} } // namespace ge diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h index 70b57597..8dad2012 100644 --- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h +++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h @@ -44,6 +44,8 @@ class InsertNewOpUtil { Status UpdateDataNodeByAipp(const ComputeGraphPtr &graph); + Status RecordAIPPInfoToData(const ComputeGraphPtr &graph); + private: Status CheckPositionNotRepeat(); @@ -61,6 +63,9 @@ class InsertNewOpUtil { Status UpdatePrevNodeByAipp(NodePtr &node, std::set &switchns); Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data); + Status GetDataRelatedNode(NodePtr &node, std::map> &data_next_node_map); + Status GetAllAipps(const NodePtr &node, std::vector &aipps); + Status GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, std::string &output); }; } // namespace ge diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc index 47d7701f..e063398f 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -16,12 +16,13 @@ #include "graph/preprocess/multi_batch_copy_graph.h" -#include #include #include +#include #include "common/formats/utils/formats_trans_utils.h" #include "common/ge/ge_util.h" +#include "common/util/error_manager/error_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" #include "framework/common/string_util.h" @@ -197,7 +198,9 @@ Status CheckDataShape(const std::vector &nodes) { } } if (unknown_shape_count == 0) { - GELOGE(PARAM_INVALID, "There are no unknown shape data, the dynamic batch/imagesize options will be ignored"); + ErrorManager::GetInstance().ATCReportErrMessage("E10055"); + GELOGE(PARAM_INVALID, + "Need unknow shape data when user set --dynamic_batch_size or --dynamic_image_size, please check."); return PARAM_INVALID; } @@ -426,7 +429,12 @@ NodePtr MultiBatchGraphCopyer::InsertShapeDataNode() { GELOGE(OUT_OF_MEMORY, "Failed to create shape data node, out of memory"); return nullptr; } - desc->SetName("ascend_mbatch_shape_data"); + string node_name = "ascend_mbatch_shape_data"; + // Only flush subgraph name + if (graph_->GetParentGraph() != nullptr) { + node_name = graph_->GetName() + "_" + node_name; + } + desc->SetName(node_name); desc->SetType(DATA); GeTensorDesc tensor_desc; @@ -468,31 +476,48 @@ Status MultiBatchGraphCopyer::CheckArguments() { return PARAM_INVALID; } if (shapes_.size() < kMinShapesCount) { - GELOGE(PARAM_INVALID, "The minimum batch-shapes count is %zu", kMinShapesCount); + ErrorManager::GetInstance().ATCReportErrMessage("E10050", {"shapesize", "minshapesize"}, + {std::to_string(shapes_.size()), std::to_string(kMinShapesCount)}); + GELOGE(PARAM_INVALID, + "Input parameter[--dynamic_batch_size or --dynamic_image_size]'s " + "value size [%zu] must be greater than [%zu].", + shapes_.size(), kMinShapesCount); return PARAM_INVALID; } if (shapes_.size() > kMaxShapesCount) { - GELOGE(PARAM_INVALID, "The max batch-shapes count is %zu", kMaxShapesCount); + ErrorManager::GetInstance().ATCReportErrMessage("E10051", {"shapesize", "maxshapesize"}, + {std::to_string(shapes_.size()), std::to_string(kMaxShapesCount)}); + GELOGE(PARAM_INVALID, + "Input parameter[--dynamic_batch_size or --dynamic_image_size]'s " + "value size [%zu] must be less than [%zu].", + shapes_.size(), kMaxShapesCount); return PARAM_INVALID; } std::set> shapes_set; size_t shape_size = shapes_.at(0).size(); for (auto &shape : shapes_) { if (shape_size != shape.size()) { - GELOGE(PARAM_INVALID, "All batch shapes size must be the same, first group's size is %zu and another's is %zu.", + ErrorManager::GetInstance().ATCReportErrMessage("E10052", {"shapesize1", "shapesize2"}, + {std::to_string(shape_size), std::to_string(shape.size())}); + GELOGE(PARAM_INVALID, + "Input parameter[--dynamic_batch_size or --dynamic_image_size]'s " + "value size must be same, first group's size is %zu and another's is %zu.", shape_size, shape.size()); return PARAM_INVALID; } for (auto dim : shape) { if (dim <= 0) { - GELOGE(PARAM_INVALID, "Invalid dim %ld, all dims must more than 0", dim); + ErrorManager::GetInstance().ATCReportErrMessage("E10053", {"dim"}, {std::to_string(dim)}); + GELOGE(PARAM_INVALID, "Invalid dim %ld, all dims must be greater than 0", dim); return PARAM_INVALID; } } shapes_set.insert(shape); } if (shapes_set.size() != shapes_.size()) { - GELOGE(PARAM_INVALID, "There are duplicated batch-shapes, please check"); + ErrorManager::GetInstance().ATCReportErrMessage("E10054"); + GELOGE(PARAM_INVALID, + "Input parameter[--dynamic_batch_size or --dynamic_image_size] exist duplicate shapes, please check"); return PARAM_INVALID; } return SUCCESS; @@ -627,11 +652,11 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { switchn_desc->SetType(SWITCHN); GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex)); - if (switchn_desc->AddInputDesc(tensor) != GRAPH_SUCCESS) { // data + if (switchn_desc->AddInputDesc("data", tensor) != GRAPH_SUCCESS) { // data return OUT_OF_MEMORY; } GeTensorDesc pred_tensor; - if (switchn_desc->AddInputDesc(pred_tensor) != GRAPH_SUCCESS) { // pred + if (switchn_desc->AddInputDesc("pred_value", pred_tensor) != GRAPH_SUCCESS) { // pred return OUT_OF_MEMORY; } for (size_t i = 0; i < shapes_.size(); ++i) { @@ -647,7 +672,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &data) { GELOGE(INTERNAL_ERROR, "Failed to add attr value on output %zu tensor", i); return INTERNAL_ERROR; } - if (switchn_desc->AddOutputDesc(tensor) != GRAPH_SUCCESS) { + if (switchn_desc->AddOutputDesc("output" + std::to_string(i), tensor) != GRAPH_SUCCESS) { GELOGE(GRAPH_FAILED, "Opdesc AddOutputDesc failed"); return GRAPH_FAILED; } @@ -914,5 +939,46 @@ Status ProcessMultiBatch(ComputeGraphPtr &graph) { } return copyer.CopyGraph(); } + +Status GetDynamicOutputShape(ComputeGraphPtr &graph) { + GELOGI("Start to get dynamic output dynamic batch shape msg"); + std::vector dynamic_output_dims; + if (graph == nullptr) { + GELOGE(PARAM_INVALID, "Graph is null ,para is invalid"); + return PARAM_INVALID; + } + for (auto &node : graph->GetAllNodes()) { + if (node->GetType() == NETOUTPUT) { + auto netoutput_desc = node->GetOpDesc(); + auto inputnode_to_netoutput = node->GetInAllNodes(); + for (size_t j = 0; j < inputnode_to_netoutput.size(); j++) { + bool ret = false; + (void)AttrUtils::GetBool(inputnode_to_netoutput.at(j)->GetOpDesc(), ATTR_INSERT_BY_MBATCH, ret); + if (inputnode_to_netoutput.at(j)->GetType() == MERGE && ret) { + GELOGI("Find the merge node %s with mbatch attr", inputnode_to_netoutput.at(j)->GetName().c_str()); + for (size_t i = 0; i < inputnode_to_netoutput.at(j)->GetInNodes().size(); i++) { + auto input_desc = inputnode_to_netoutput.at(j)->GetOpDesc(); + auto input_tensor_desc = input_desc->GetInputDesc(i); + auto shape_msg = input_tensor_desc.GetShape().ToString(); + std::string output_shape = std::to_string(i) + "," + std::to_string(j) + "," + shape_msg; + GELOGI("The shape msg in dynamic batch is %s", output_shape.c_str()); + dynamic_output_dims.emplace_back(output_shape); + } + } + } + if (dynamic_output_dims.size() > 0) { + if (!AttrUtils::SetListStr(netoutput_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims)) { + GELOGE(FAILED, "Set dynamic output dims attr failed"); + return FAILED; + } + return SUCCESS; + } + GELOGI("Can not find the merge node with mbatch attr"); + return SUCCESS; + } + } + GELOGW("There are no netoutput in graph"); + return SUCCESS; +} } // namespace multibatch } // namespace ge diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.h b/src/ge/graph/preprocess/multi_batch_copy_graph.h index b3642dbd..2500645f 100644 --- a/src/ge/graph/preprocess/multi_batch_copy_graph.h +++ b/src/ge/graph/preprocess/multi_batch_copy_graph.h @@ -27,6 +27,7 @@ namespace ge { namespace multibatch { Status ProcessMultiBatch(ComputeGraphPtr &graph); +Status GetDynamicOutputShape(ComputeGraphPtr &graph); enum NodeStatus { kNodeInBatchBranch, diff --git a/src/ge/host_kernels/slice_kernel.cc b/src/ge/host_kernels/slice_kernel.cc index ac2d5cc3..1d7d90c2 100644 --- a/src/ge/host_kernels/slice_kernel.cc +++ b/src/ge/host_kernels/slice_kernel.cc @@ -42,7 +42,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType(); // check data type of begin and size if (begin->GetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { - GELOGE(PARAM_INVALID, "Data type of begin and size for slice are not DT_INT32."); + GELOGW("Data type of begin and size for slice are not DT_INT32."); return NOT_CHANGED; } @@ -75,7 +75,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetShape(); size_t dim_size = x_shape.GetDimNum(); if (dim_size != begin_size || dim_size != size_size) { - GELOGE(PARAM_INVALID, "Data type of begin and size for slice are not DT_INT32."); + GELOGW("Data type of begin and size for slice are not DT_INT32."); return NOT_CHANGED; } @@ -103,13 +103,19 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector(output_tensor_desc); if (output_ptr == nullptr) { - GELOGE(MEMALLOC_FAILED, "make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str()); + GELOGW("make_shared ge::GeTensor failed, node name %s.", attr->GetName().c_str()); return NOT_CHANGED; } - Status ret = OpUtils::SetOutputSliceData(data, static_cast(data_size), data_type, input_dims, begin_vec, - output_dims, output_ptr.get(), stride_vec); + + Status ret = CheckOutputDims(output_dims, attr); + if (ret != SUCCESS) { + return ret; + } + + ret = OpUtils::SetOutputSliceData(data, static_cast(data_size), data_type, input_dims, begin_vec, + output_dims, output_ptr.get(), stride_vec); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "SetOutputSliceData failed."); + GELOGW("SetOutputSliceData failed."); return NOT_CHANGED; } v_output.push_back(output_ptr); @@ -117,5 +123,16 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector &output_dims, const OpDescPtr attr) { + // check dim not all less than 0 + for (auto dim : output_dims) { + if (dim > 0) { + return SUCCESS; + } + } + GELOGW("all output dim <=0, can't be processed. op_name : %s", attr->GetName().c_str()); + return NOT_CHANGED; +} + REGISTER_KERNEL(SLICE, SliceKernel); } // namespace ge diff --git a/src/ge/host_kernels/slice_kernel.h b/src/ge/host_kernels/slice_kernel.h index 582e140a..1a374096 100644 --- a/src/ge/host_kernels/slice_kernel.h +++ b/src/ge/host_kernels/slice_kernel.h @@ -26,6 +26,8 @@ class SliceKernel : public Kernel { public: Status Compute(const OpDescPtr attr, const std::vector &input, vector &v_output) override; + + Status CheckOutputDims(const std::vector &output_dims, const OpDescPtr attr); }; } // namespace ge diff --git a/src/ge/host_kernels/unsqueeze_kernel.cc b/src/ge/host_kernels/unsqueeze_kernel.cc new file mode 100644 index 00000000..d66a3e2c --- /dev/null +++ b/src/ge/host_kernels/unsqueeze_kernel.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "host_kernels/unsqueeze_kernel.h" +#include +#include "common/ge_inner_error_codes.h" +#include "common/op/ge_op_utils.h" +#include "common/types.h" +#include "framework/common/debug/ge_log.h" +#include "host_kernels/kernel_utils.h" +#include "inc/kernel_factory.h" + +namespace ge { +namespace { +constexpr uint32_t kInputDescIndex = 0; +constexpr uint32_t kOutputDescIndex = 0; +constexpr size_t kSqueezeInputSize = 1; +constexpr size_t kSqueezeOutputSize = 1; +} // namespace + +Status UnsqueezeKernel::Compute(const NodePtr &node_ptr) { + GE_CHECK_NOTNULL(node_ptr); + if (!KernelUtils::CheckFormatSupported(node_ptr)) { + GELOGW("CheckFormatSupported failed"); + return NOT_CHANGED; + } + return SUCCESS; +} + +Status UnsqueezeKernel::Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, + std::vector &v_output) { + GE_CHECK_NOTNULL(op_desc_ptr); + GELOGD("SqueezeKernel in: node[%s]", op_desc_ptr->GetName().c_str()); + bool is_check_failed = ((op_desc_ptr->GetInputsSize() != kSqueezeInputSize) || + (op_desc_ptr->GetOutputsSize() != kSqueezeOutputSize) || (input.size() != kSqueezeInputSize)); + if (is_check_failed) { + GELOGW("Size check fail, node[%s] inputs size:%zu, outputs size:%zu, input size:%zu", + op_desc_ptr->GetName().c_str(), op_desc_ptr->GetInputsSize(), op_desc_ptr->GetOutputsSize(), input.size()); + return NOT_CHANGED; + } + + auto tensor_desc = op_desc_ptr->GetOutputDesc(kOutputDescIndex); + GeTensorPtr output_ptr = MakeShared(tensor_desc); + GE_CHECK_NOTNULL(output_ptr); + + auto input_tensor = input.at(kInputDescIndex); + GE_CHECK_NOTNULL(input_tensor); + + if (output_ptr->SetData(input_tensor->GetData()) != GRAPH_SUCCESS) { + GELOGW("Compute: SetData failed"); + } + v_output.emplace_back(output_ptr); + GELOGD("UnsqueezeKernel success: node[%s]", op_desc_ptr->GetName().c_str()); + return SUCCESS; +} +REGISTER_KERNEL(UNSQUEEZE, UnsqueezeKernel); +} // namespace ge diff --git a/src/ge/host_kernels/unsqueeze_kernel.h b/src/ge/host_kernels/unsqueeze_kernel.h new file mode 100644 index 00000000..c676586f --- /dev/null +++ b/src/ge/host_kernels/unsqueeze_kernel.h @@ -0,0 +1,32 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_FOLDING_KERNEL_UNSQUEEZE_KERNEL_H_ +#define GE_GRAPH_PASSES_FOLDING_KERNEL_UNSQUEEZE_KERNEL_H_ + +#include +#include "inc/kernel.h" + +namespace ge { +class UnsqueezeKernel : public Kernel { + public: + Status Compute(const NodePtr &node_ptr) override; + Status Compute(const ge::OpDescPtr op_desc_ptr, const std::vector &input, + std::vector &v_output) override; +}; +} // namespace ge + +#endif // GE_GRAPH_PASSES_FOLDING_KERNEL_UNSQUEEZE_KERNEL_H_ diff --git a/src/ge/hybrid/executor/hybrid_execution_context.h b/src/ge/hybrid/executor/hybrid_execution_context.h index f7e7af88..07a6fabf 100644 --- a/src/ge/hybrid/executor/hybrid_execution_context.h +++ b/src/ge/hybrid/executor/hybrid_execution_context.h @@ -41,12 +41,12 @@ struct GraphExecutionContext { std::unordered_map node_states; rtStream_t stream = nullptr; std::unique_ptr callback_manager; - NpuMemoryAllocator *allocator; + NpuMemoryAllocator *allocator = nullptr; mutable std::unique_ptr profiler; bool trace_enabled = false; int profiling_level = 0; bool dump_enabled = false; - Status status; + Status status = SUCCESS; std::mutex mu_; NodeStatePtr GetOrCreateNodeState(const NodePtr &node); diff --git a/src/ge/hybrid/executor/hybrid_model_async_executor.cc b/src/ge/hybrid/executor/hybrid_model_async_executor.cc index 2999daba..bd5d77f7 100644 --- a/src/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/src/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -316,4 +316,4 @@ Status HybridModelAsyncExecutor::CopyOutputs(const std::vector &out return SUCCESS; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/executor/hybrid_model_executor.cc b/src/ge/hybrid/executor/hybrid_model_executor.cc index 97dc5a36..856b4483 100644 --- a/src/ge/hybrid/executor/hybrid_model_executor.cc +++ b/src/ge/hybrid/executor/hybrid_model_executor.cc @@ -106,7 +106,6 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context context.execution_queue.Restart(); GE_CHK_STATUS_RET_NOLOG(context.callback_manager->Init()); - // TODO do not re-assign Consts every run for (auto const_node : model.GetConstNodes()) { auto weight_tensor = model.GetWeight(const_node); GE_CHECK_NOTNULL(weight_tensor); diff --git a/src/ge/hybrid/executor/rt_callback_manager.cc b/src/ge/hybrid/executor/rt_callback_manager.cc index 1787cf77..6be8da31 100644 --- a/src/ge/hybrid/executor/rt_callback_manager.cc +++ b/src/ge/hybrid/executor/rt_callback_manager.cc @@ -72,7 +72,6 @@ Status CallbackManager::CallbackProcess(rtContext_t context) { return RT_FAILED; } - // TODO reuse event GE_CHK_RT(rtEventDestroy(event)); auto cb_func = entry.second.first; @@ -105,10 +104,10 @@ void CallbackManager::RtCallbackFunc(void *data) { } Status CallbackManager::RegisterCallback(const std::function &callback) { - auto *func = new (std::nothrow) std::function(callback); + auto func = std::unique_ptr>(new (std::nothrow) std::function(callback)); GE_CHECK_NOTNULL(func); GELOGD("Callback registered"); - return RegisterCallback(RtCallbackFunc, func); + return RegisterCallback(RtCallbackFunc, func.release()); } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/executor/worker/execution_engine.cc b/src/ge/hybrid/executor/worker/execution_engine.cc index f4657cd4..9e656139 100644 --- a/src/ge/hybrid/executor/worker/execution_engine.cc +++ b/src/ge/hybrid/executor/worker/execution_engine.cc @@ -134,6 +134,7 @@ Status ExecutionEngine::ExecutionProcess() { auto shared_task_context = shared_ptr(task_context.release()); auto cb = std::shared_ptr(new (std::nothrow) NodeDoneCallback(context_, shared_task_context)); + GE_CHECK_NOTNULL(cb); auto callback = [&, cb]() { auto ret = cb->OnNodeDone(); if (ret != SUCCESS) { @@ -198,4 +199,4 @@ Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item, TaskContext return SUCCESS; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/executor/worker/task_compile_engine.cc b/src/ge/hybrid/executor/worker/task_compile_engine.cc index 07e70a93..f6434ffa 100644 --- a/src/ge/hybrid/executor/worker/task_compile_engine.cc +++ b/src/ge/hybrid/executor/worker/task_compile_engine.cc @@ -89,6 +89,7 @@ Status TaskCompileEngine::CompileProcess() { } auto entry = unique_ptr(new (std::nothrow) ResultQueueEntry()); + GE_CHECK_NOTNULL(entry); entry->node_state = node_state; auto node_item = *node_state->node_item; @@ -183,4 +184,4 @@ Status TaskCompileEngine::DistributeCompiledTasks() { return ret; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/model/hybrid_model_builder.cc b/src/ge/hybrid/model/hybrid_model_builder.cc index ce220bde..190890b7 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.cc +++ b/src/ge/hybrid/model/hybrid_model_builder.cc @@ -66,7 +66,6 @@ Status HybridModelBuilder::Build() { GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName()); GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName()); GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName()); - // TODO VAR_ATTR_VAR_IS_BROADCAST ??? GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName()); GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName()); @@ -303,7 +302,7 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) { Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) { const auto &parent_node = graph.GetParentNode(); - const NodePtr &net_output_node = graph.FindNode(NODE_NAME_NET_OUTPUT); + const NodePtr &net_output_node = graph.FindFirstNodeMatchType(NETOUTPUT); GE_CHECK_NOTNULL(net_output_node); const auto &net_output_desc = net_output_node->GetOpDesc(); GE_CHECK_NOTNULL(net_output_desc); @@ -776,7 +775,7 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node, auto src_graph = NodeUtils::GetSubgraph(*src_wrapped_node, kSubgraphIndex); GE_CHECK_NOTNULL(src_graph); - auto src_net_output_node = src_graph->FindNode(NODE_NAME_NET_OUTPUT); + auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(src_net_output_node == nullptr, return INTERNAL_ERROR, "Failed to find NetOutput in subgraph: %s", src_graph->GetName().c_str()); auto net_output_desc = src_net_output_node->GetOpDesc(); @@ -843,7 +842,7 @@ Status HybridModelBuilder::ParsePartitionedCall(NodeItem &node_item) { GELOGD("Start to parse outputs of node: %s", node_item.NodeName().c_str()); auto subgraph = NodeUtils::GetSubgraph(*node_item.node, kSubgraphIndex); GE_CHECK_NOTNULL(subgraph); - auto net_output_node = subgraph->FindNode(NODE_NAME_NET_OUTPUT); + auto net_output_node = subgraph->FindFirstNodeMatchType(NETOUTPUT); GE_CHECK_NOTNULL(net_output_node); auto net_output_desc = net_output_node->GetOpDesc(); GE_CHECK_NOTNULL(net_output_desc); @@ -953,4 +952,4 @@ Status HybridModelBuilder::CopyVarData() { return SUCCESS; } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc new file mode 100644 index 00000000..d5c3c03c --- /dev/null +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -0,0 +1,204 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" +#include "framework/common/util.h" +#include "framework/common/fmk_error_codes.h" +#include "framework/common/debug/log.h" + +namespace ge { +namespace hybrid { +namespace { +// if dim count is not reach kMaxShapeDims(8), use INT64_MIN to mark dim end. +constexpr int64_t kDimEndFlag = INT64_MIN; +} // namespace + +Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { + GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); + if (ext_info.empty()) { + GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str()); + return PARAM_INVALID; + } + + ext_info_len_ = ext_info.size(); + ext_info_.reset(new (std::nothrow) uint8_t[ext_info_len_]); + GE_CHECK_NOTNULL(ext_info_); + + (void)memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()); + + input_shape_and_type_.clear(); + output_shape_and_type_.clear(); + + auto ext_info_data = ext_info_.get(); + size_t offset = 0; + while (offset + sizeof(AicpuExtInfo) <= ext_info_len_) { + auto aicpu_ext_info = reinterpret_cast(ext_info_data + offset); + GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); + switch (aicpu_ext_info->infoType) { + case aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE: + GE_CHK_STATUS_RET(ParseExtShapeType(aicpu_ext_info), "Parse ext shape type failed."); + break; + case aicpu::FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE: + GE_CHK_STATUS_RET(ParseExtInputShape(aicpu_ext_info), "Parse ext input shape failed."); + break; + case aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE: + GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed."); + break; + default: + GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoType, + aicpu_ext_info->infoLen); + break; + } + offset += sizeof(AicpuExtInfo); + offset += aicpu_ext_info->infoLen; + } + + GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, PARAM_INVALID, + "Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.", + node_name_.c_str(), offset, ext_info_len_); + GELOGI("Node[%s] parse ext info end.", node_name_.c_str()); + return SUCCESS; +} + +Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID, + "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", node_name_.c_str(), + sizeof(int32_t), aicpu_ext_info->infoLen); + + auto type = reinterpret_cast(aicpu_ext_info->infoMsg); + + GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID, + "Node[%s] parse ext shape type failed as need %d but %d.", node_name_.c_str(), unknown_type_, + *type); + GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); + return SUCCESS; +} + +Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { + auto need_len = input_num_ * sizeof(AicpuShapeAndType); + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, + "Node[%s] parse ext input shape failed as infoLen must be " + "input_num[%zu]*sizeof(ShapeAndType)[%zu] but %u.", + node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); + + auto input = reinterpret_cast(aicpu_ext_info->infoMsg); + + for (uint32_t index = 0; index < input_num_; ++index) { + input_shape_and_type_.emplace_back(&input[index]); + } + GELOGI("Node[%s] parse ext input shape success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); + return SUCCESS; +} + +Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { + if (unknown_type_ == DEPEND_COMPUTE) { + GELOGD("Node[%s] is depend compute type no need ext output shape, ignore it, infoLen=%u.", node_name_.c_str(), + aicpu_ext_info->infoLen); + return SUCCESS; + } + auto need_len = output_num_ * sizeof(AicpuShapeAndType); + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, + "Node[%s] parse ext output shape failed as infoLen must be " + "output_num[%zu]*sizeof(ShapeAndType)[%zu] but %u.", + node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); + + auto output = reinterpret_cast(aicpu_ext_info->infoMsg); + for (uint32_t index = 0; index < output_num_; ++index) { + output_shape_and_type_.emplace_back(&output[index]); + } + GELOGI("Node[%s] parse ext output shape success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); + return SUCCESS; +} + +Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const GeTensorDesc &input_desc) { + GE_CHECK_LE(input_index, input_num_); + const auto &shape = input_desc.GetShape(); + + GE_CHK_STATUS_RET(UpdateShapeAndType(shape, input_desc.GetDataType(), input_shape_and_type_[input_index]), + "Node[%s] input[%u] update input shape and type failed.", node_name_.c_str(), input_index); + return SUCCESS; +} + +Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) { + GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR, + "Node[%s] is depend compute is no need update output shape and type by ext.", + node_name_.c_str()); + GE_CHECK_LE(output_index, output_num_); + auto shape = output_desc.GetShape(); + + // shape range need use range update shape + if (unknown_type_ == DEPEND_SHAPE_RANGE) { + std::vector> range; + auto range_ret = output_desc.GetShapeRange(range); + GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, + "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", node_name_.c_str(), + range_ret); + for (size_t k = 0; k < range.size(); ++k) { + if (shape.GetDim(k) < 0 && k < range.size()) { + GELOGD("Node[%s] output[%u] update dim[%zu] from %ld to range max %ld.", node_name_.c_str(), output_index, k, + shape.GetDim(k), range[k].second); + shape.SetDim(k, range[k].second); + } + } + } + + return UpdateShapeAndType(shape, output_desc.GetDataType(), output_shape_and_type_[output_index]); +} + +Status AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type) { + GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR, + "Node[%s] is depend compute type can not get output shape and type by ext.", + node_name_.c_str()); + GetShapeAndType(output_shape_and_type_[output_index], shape, data_type); + return SUCCESS; +} + +Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType data_type, + AicpuShapeAndType *shape_and_type) { + auto dim_num = shape.GetDimNum(); + if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { + GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", dim_num, + aicpu::FWKAdapter::kMaxShapeDims); + return PARAM_INVALID; + } + size_t index = 0; + for (; index < dim_num; ++index) { + shape_and_type->dims[index] = shape.GetDim(index); + } + if (index < aicpu::FWKAdapter::kMaxShapeDims) { + shape_and_type->dims[index] = kDimEndFlag; + } + + // now only support update shape, type is not support + return SUCCESS; +} + +void AicpuExtInfoHandler::GetShapeAndType(const AicpuShapeAndType *shape_and_type, GeShape &shape, + DataType &data_type) { + std::vector dims; + for (uint32_t index = 0; index < aicpu::FWKAdapter::kMaxShapeDims; ++index) { + auto tmpDim = shape_and_type->dims[index]; + if (tmpDim == kDimEndFlag) { + break; + } + dims.emplace_back(tmpDim); + } + data_type = static_cast(shape_and_type->type); + shape = std::move(GeShape(dims)); +} + +} // namespace hybrid +} // namespace ge \ No newline at end of file diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h b/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h new file mode 100644 index 00000000..e96d794c --- /dev/null +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_ext_info.h @@ -0,0 +1,71 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_HYBRID_AICPU_EXT_INFO_H_ +#define GE_HYBRID_AICPU_EXT_INFO_H_ + +#include "external/ge/ge_api_error_codes.h" +#include "cce/fwk_adpt_struct.h" +#include "graph/op_desc.h" +#include "graph/ge_tensor.h" + +namespace ge { +namespace hybrid { + +using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; +using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; + +class AicpuExtInfoHandler { + public: + AicpuExtInfoHandler(std::string node_name, uint32_t input_num, uint32_t output_num, UnknowShapeOpType unknown_type) + : node_name_(std::move(node_name)), input_num_(input_num), output_num_(output_num), unknown_type_(unknown_type) {} + + ~AicpuExtInfoHandler() = default; + + uint8_t *GetExtInfo() const { return ext_info_.get(); } + size_t GetExtInfoLen() const { return ext_info_len_; } + + Status Parse(const std::string &ext_info); + + Status UpdateInputShapeAndType(uint32_t input_index, const GeTensorDesc &input_desc); + + Status UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc); + + Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); + + private: + Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info); + Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); + Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); + + static Status UpdateShapeAndType(const GeShape &shape, DataType data_type, AicpuShapeAndType *shape_and_type); + + static void GetShapeAndType(const AicpuShapeAndType *shape_and_type, GeShape &shape, DataType &data_type); + + private: + const std::string node_name_; + const uint32_t input_num_; + const uint32_t output_num_; + UnknowShapeOpType unknown_type_; + + std::unique_ptr ext_info_; + size_t ext_info_len_ = 0; + std::vector input_shape_and_type_; + std::vector output_shape_and_type_; +}; +} // namespace hybrid +} // namespace ge +#endif // GE_HYBRID_AICPU_EXT_INFO_H_ \ No newline at end of file diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 2698f79e..372f35f5 100644 --- a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -16,32 +16,22 @@ #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" #include "common/formats/formats.h" +#include "aicpu/common/aicpu_task_struct.h" #include "graph/load/new_model_manager/model_manager.h" -#include "hybrid/common/npu_memory_allocator.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/model/hybrid_model.h" #include "init/gelib.h" namespace ge { namespace hybrid { -using aicpu::FWKAdapter::ExtInfo; namespace { // mem need release constexpr uint64_t kReleaseFlag = 1; - -// max dim count is 8. -constexpr uint32_t kMaxDimCount = 8; - -// if dim count is not reach kMaxDimCount, use INT64_MIN to mark dim end. -constexpr int64_t kDimEndFlag = INT64_MIN; - -struct MaxShape { - int64_t dims[kMaxDimCount] = {0}; -}; } // namespace REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, AiCpuNodeExecutor); +REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_CUSTOM, AiCpuNodeExecutor); -Status AicpuTfNodeTask::AllocTensorBuffer(size_t size, std::unique_ptr &tensor_buffer) { +Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr &tensor_buffer) { auto allocator = NpuMemoryAllocator::GetAllocator(); GE_CHECK_NOTNULL(allocator); tensor_buffer = TensorBuffer::Create(allocator, size); @@ -49,94 +39,179 @@ Status AicpuTfNodeTask::AllocTensorBuffer(size_t size, std::unique_ptr 0) { - ext_info_size += sizeof(ExtInfo) + input_num_ * sizeof(MaxShape); - ++ext_info_num_; +Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info) { + GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), + "Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", node_name_.c_str(), + kernel_ext_info.size()); + + // copy task args buf + GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_ext_info.size(), ext_info_addr_dev_), + "Node[%s] alloc kernel_ext_info buf failed, size=%zu", node_name_.c_str(), kernel_ext_info.size()); + + // if no input and no output(DEPEND_COMPUTE equal no output), copy once, or else copy when update args. + if (node_item_->num_inputs == 0 && ((unknown_type_ == DEPEND_COMPUTE) || (node_item_->num_outputs == 0))) { + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), kernel_ext_info.data(), + kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); } + return SUCCESS; +} - // exit info 2:output shape - if ((unknown_type_ != DEPEND_COMPUTE) && (output_num_ > 0)) { - ext_info_size += sizeof(ExtInfo) + output_num_ * sizeof(MaxShape); - ++ext_info_num_; +Status AicpuNodeTaskBase::UpdateOutputShapeFromExtInfo() { + if (node_item_->num_outputs == 0) { + GELOGI("Task [%s] output_num is 0, no need update output shape.", node_name_.c_str()); + return SUCCESS; + } + // copy to host buf + GE_CHK_RT_RET(rtMemcpy(aicpu_ext_handle_.GetExtInfo(), aicpu_ext_handle_.GetExtInfoLen(), + ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); + + for (auto i = 0; i < node_item_->num_outputs; ++i) { + GeShape shape; + // not support update data type now, just for param + DataType data_type; + aicpu_ext_handle_.GetOutputShapeAndType(i, shape, data_type); + auto output_desc = node_item_->op_desc->MutableOutputDesc(i); + GE_CHECK_NOTNULL(output_desc); + GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(shape, i, output_desc), "Update node %s [%d]th output shape failed.", + node_name_.c_str(), i); } + return SUCCESS; +} - GE_CHK_STATUS_RET(AllocTensorBuffer(ext_info_size, ext_info_addr_dev_), - "Node %s alloc buffer for ext info failed, size=%zu.", node_->GetName().c_str(), ext_info_size); +Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(const GeShape &shape_new, int32_t output_index, + GeTensorDescPtr &output_desc) { + auto shape_old = output_desc->GetShape(); + output_desc->SetShape(shape_new); + GELOGI("Update node[%s] out[%d] shape from %s to %s.", node_name_.c_str(), output_index, shape_old.ToString().c_str(), + shape_new.ToString().c_str()); - auto ext_info_dev_base = reinterpret_cast(ext_info_addr_dev_->GetData()); - ext_info_addr_host_.reset(new (std::nothrow) uint8_t[ext_info_size]); - GE_CHECK_NOTNULL(ext_info_addr_host_); + auto origin_shape_old = output_desc->GetOriginShape(); + auto origin_format = output_desc->GetOriginFormat(); + auto format = output_desc->GetFormat(); + if (origin_format == format) { + output_desc->SetOriginShape(shape_new); + return SUCCESS; + } + // if format is not same need convert shape + std::vector origin_dims_new; + auto trans_ret = + formats::TransShape(format, shape_new.GetDims(), output_desc->GetDataType(), origin_format, origin_dims_new); + GE_CHK_STATUS_RET(trans_ret, + "Node[%s] out[%d] originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", + node_name_.c_str(), output_index, origin_format, format, shape_new.ToString().c_str()); + auto origin_shape_new = GeShape(origin_dims_new); + output_desc->SetOriginShape(origin_shape_new); + GELOGI("Node[%s] out[%d] originFormat[%d] is not same as format[%d], need update from %s ro %s.", node_name_.c_str(), + output_index, origin_format, format, origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); + return SUCCESS; +} - size_t ext_info_type_offset = ext_info_num_ * sizeof(ExtInfo); - size_t ext_info_input_shape_offset = ext_info_type_offset + sizeof(uint32_t); +Status AicpuNodeTaskBase::UpdateExtInfo() { + GELOGI("Node[%s] update ext info begin, unknown_type=%d.", node_name_.c_str(), unknown_type_); + if (node_item_->num_inputs == 0 && node_item_->num_outputs == 0) { + GELOGI("Node[%s] has no input and output, no need update ext info.", node_name_.c_str()); + return SUCCESS; + } - auto ext_info_host_buf = ext_info_addr_host_.get(); + for (auto i = 0; i < node_item_->num_inputs; ++i) { + auto input_desc = node_item_->op_desc->MutableInputDesc(i); + GE_CHECK_NOTNULL(input_desc); + GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateInputShapeAndType(i, *input_desc), + "Node[%s] input[%d] update input shape failed.", node_name_.c_str(), i); + } - auto ext_info_type = reinterpret_cast(ext_info_host_buf); - ext_info_type->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; - ext_info_type->infoLen = sizeof(uint32_t); - ext_info_type->infoAddr = ext_info_dev_base + ext_info_type_offset; - // set unknown shape type - auto unkonw_shape_type_addr = reinterpret_cast(ext_info_host_buf + ext_info_type_offset); - *unkonw_shape_type_addr = unknown_type_; + if (unknown_type_ != DEPEND_COMPUTE) { + for (auto j = 0; j < node_item_->num_outputs; ++j) { + auto output_desc = node_item_->op_desc->MutableOutputDesc(j); + GE_CHECK_NOTNULL(output_desc); - if (input_num_ > 0) { - auto ext_info_input = reinterpret_cast(ext_info_host_buf + sizeof(ExtInfo)); - ext_info_input->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE; - ext_info_input->infoLen = input_num_ * sizeof(MaxShape); - ext_info_input->infoAddr = ext_info_dev_base + ext_info_input_shape_offset; + GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateOutputShapeAndType(j, *output_desc), + "Node[%s] output[%d] UpdateOutputShapeAndType failed.", node_name_.c_str(), j); + } } - if ((unknown_type_ != DEPEND_COMPUTE) && (output_num_ > 0)) { - size_t ext_info_output_shape_offset = ext_info_input_shape_offset + input_num_ * sizeof(MaxShape); - auto ext_info_output = reinterpret_cast(ext_info_host_buf + (ext_info_num_ - 1) * sizeof(ExtInfo)); - ext_info_output->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE; - ext_info_output->infoLen = output_num_ * sizeof(MaxShape); - ext_info_output->infoAddr = ext_info_dev_base + ext_info_output_shape_offset; + + // copy input and output shapes to device + GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), aicpu_ext_handle_.GetExtInfo(), + aicpu_ext_handle_.GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE)); + + GELOGI("Node[%s] update ext info end.", node_name_.c_str()); + return SUCCESS; +} + +Status AicpuNodeTaskBase::UpdateArgs(TaskContext &context) { + GELOGI("Node[%s] update args begin. unknown_type=%d", node_name_.c_str(), unknown_type_); + if (node_item_->num_inputs == 0 && node_item_->num_outputs == 0) { + GELOGI("Node[%s] has no input and output, no need update args.", node_name_.c_str()); + return SUCCESS; } - GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), ext_info_host_buf, ext_info_size, - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_STATUS_RET(UpdateIoAddr(context), "Node[%s] update io addr failed.", node_name_.c_str()); + + GE_CHK_STATUS_RET(UpdateExtInfo(), "Node[%s] update ext info failed.", node_name_.c_str()); + + GELOGI("Node[%s] update args end.", node_name_.c_str()); + return SUCCESS; +} + +Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function done_callback) { + GELOGI("Node[%s] execute async start. unknown_type=%d.", node_name_.c_str(), unknown_type_); + + GE_CHK_STATUS_RET(LaunchTask(context)); + + auto callback = [=, &context]() { + GELOGI("Node[%s] callback start.", node_name_.c_str()); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start"); + Status callback_ret = TaskCallback(context); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] End"); + + GELOGI("Node[%s] task callBack ret = %u.", node_name_.c_str(), callback_ret); + if (done_callback != nullptr) { + context.SetStatus(callback_ret); + done_callback(); + } + + GELOGI("Node[%s] callback end.", node_name_.c_str()); + }; + + GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(callback)); + + GELOGI("Node[%s] execute async end.", node_name_.c_str()); return SUCCESS; } Status AicpuTfNodeTask::InitForDependComputeTask() { - if ((unknown_type_ != DEPEND_COMPUTE) || (output_num_ == 0)) { - GELOGI("node %s type %s unknown_type is %d, output num is %zu.", node_->GetName().c_str(), node_->GetType().c_str(), - unknown_type_, output_num_); + if ((unknown_type_ != DEPEND_COMPUTE) || (node_item_->num_outputs == 0)) { + GELOGI("Node[%s] type[%s] unknown_type is %d, output num is %d.", node_name_.c_str(), node_item_->node_type.c_str(), + unknown_type_, node_item_->num_outputs); return SUCCESS; } - output_summary_.resize(output_num_); + output_summary_.resize(node_item_->num_outputs); constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary); - for (size_t i = 0; i < output_num_; ++i) { + for (auto i = 0; i < node_item_->num_outputs; ++i) { GE_CHK_STATUS_RET(AllocTensorBuffer(result_summary_size, output_summary_[i]), - "Node %s alloc buffer for ext info failed, size=%zu.", node_->GetName().c_str(), + "Node[%s] alloc buffer for result summary info failed, size=%zu.", node_name_.c_str(), result_summary_size); } - output_summary_host_.resize(output_num_); + output_summary_host_.resize(node_item_->num_outputs); // init for mem copy task // copy task need copy output_data and output_shape, max len is 2 * output_num - const size_t copy_input_buf_len = output_num_ * 2 * sizeof(uint64_t); + const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_release_flag_dev_), - "Node %s alloc copy task input release_flag failed, size=%zu", node_->GetName().c_str(), + "Node[%s] alloc copy task input release_flag failed, size=%zu", node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_data_size_dev_), - "Node %s alloc copy task input data_size failed, size=%zu", node_->GetName().c_str(), + "Node[%s] alloc copy task input data_size failed, size=%zu", node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_src_dev_), - "Node %s alloc copy task input src failed, size=%zu", node_->GetName().c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input src failed, size=%zu", node_name_.c_str(), copy_input_buf_len); GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_dst_dev_), - "Node %s alloc copy task input dst failed, size=%zu", node_->GetName().c_str(), copy_input_buf_len); + "Node[%s] alloc copy task input dst failed, size=%zu", node_name_.c_str(), copy_input_buf_len); // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), copy_task_args_buf_), - "Node %s alloc copy task args buf failed, size=%zu", node_->GetName().c_str(), + "Node[%s] alloc copy task args buf failed, size=%zu", node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL)); std::vector copy_io_addr; @@ -150,7 +225,7 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { // can alloc in init, it can reuse GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_), - "Node %s alloc copy task io buf failed, size=%zu", node_->GetName().c_str(), copy_io_addr_size); + "Node[%s] alloc copy task io buf failed, size=%zu", node_name_.c_str(), copy_io_addr_size); GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size, ©_io_addr[0], copy_io_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); @@ -158,155 +233,107 @@ Status AicpuTfNodeTask::InitForDependComputeTask() { } Status AicpuTfNodeTask::Init(const HybridModel &model) { - auto node_name = node_->GetName(); - GELOGI("AicpuTfNodeTask[%s] Init Start.", node_name.c_str()); - auto op_desc = node_->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); + GELOGI("Node[%s] init start.", node_name_.c_str()); - const auto node_item = model.GetNodeItem(node_); - GE_CHECK_NOTNULL(node_item); - unknown_type_ = node_item->shape_inference_type; + GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel_ex(), FAILED, "Node[%s] is tf node but task def does not has kernel ex.", + node_name_.c_str()); auto &kernel_ex_def = task_def_.kernel_ex(); - - auto kernel_workspace_size = static_cast(kernel_ex_def.task_info_size()); + auto kernel_workspace_size = kernel_ex_def.task_info().size(); GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_workspace_size, kernel_workspace_), - "Node %s alloc buffer for kernel workspace failed, size=%zu.", node_name.c_str(), + "Node[%s] alloc buffer for kernel workspace failed, size=%zu.", node_name_.c_str(), kernel_workspace_size); GE_CHK_RT_RET(rtMemcpy(kernel_workspace_->GetData(), kernel_workspace_size, kernel_ex_def.task_info().data(), - static_cast(kernel_ex_def.task_info_size()), RT_MEMCPY_HOST_TO_DEVICE)); - input_num_ = op_desc->GetInputsSize(); - output_num_ = op_desc->GetOutputsSize(); - size_t input_output_size = (input_num_ + output_num_) * sizeof(uint64_t); - if (input_output_size > 0) { - // alloc input output addr buf - GE_CHK_STATUS_RET(AllocTensorBuffer(input_output_size, input_output_addr_), - "Node %s alloc buffer for input output addr failed, size=%zu.", node_name.c_str(), - input_output_size); - } + kernel_workspace_size, RT_MEMCPY_HOST_TO_DEVICE)); + + auto input_output_size = (node_item_->num_inputs + node_item_->num_outputs) * sizeof(uint64_t); + // alloc input output addr buf, allow alloc size 0 + GE_CHK_STATUS_RET(AllocTensorBuffer(input_output_size, input_output_addr_), + "Node[%s] alloc buffer for io addr failed, size=%zu.", node_name_.c_str(), input_output_size); + + auto &kernel_ext_info = kernel_ex_def.kernel_ext_info(); + auto kernel_ext_info_size = kernel_ex_def.kernel_ext_info_size(); + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", node_name_.c_str(), + kernel_ext_info.size(), kernel_ext_info_size); // init ext info - GE_CHK_STATUS_RET(InitExtInfo(), "Task %s init ext info failed.", node_name.c_str()); - GE_CHK_STATUS_RET(InitForDependComputeTask(), "Task %s init for depend compute task failed.", node_name.c_str()); + GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name_.c_str()); + GE_CHK_STATUS_RET(InitForDependComputeTask(), "Node[%s] init for depend compute task failed.", node_name_.c_str()); // build fwk_op_kernel. - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(sizeof(STR_FWK_OP_KERNEL) < kernel_ex_def.args_size(), return FAILED, - "sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", sizeof(STR_FWK_OP_KERNEL), - kernel_ex_def.args_size()); + GE_CHK_BOOL_RET_STATUS(sizeof(STR_FWK_OP_KERNEL) >= kernel_ex_def.args_size(), FAILED, + "Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u", node_name_.c_str(), + sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size()); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; - errno_t sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), - static_cast(kernel_ex_def.args_size())); - GE_CHK_BOOL_EXEC(sec_ret == EOK, return INTERNAL_ERROR, "memcpy fwk_op_kernel failed, ret: %d", sec_ret); + errno_t sec_ret = + memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args().data(), kernel_ex_def.args_size()); + GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, "Node[%s] memcpy fwk_op_kernel failed, ret: %d.", + node_name_.c_str(), sec_ret); fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_->GetData()); fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(input_output_addr_->GetData()); // set ext info addr and ext info num fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoAddr = reinterpret_cast(ext_info_addr_dev_->GetData()); - fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoNum = ext_info_num_; + fwk_op_kernel.fwkKernelBase.fwk_kernel.extInfoLen = ext_info_addr_dev_->GetSize(); - // get step_id_addr - auto var_tensor = model.GetVariable(NODE_NAME_GLOBAL_STEP); - uint64_t step_id_addr = 0; - if (var_tensor != nullptr) { - step_id_addr = reinterpret_cast(var_tensor->GetData()); - } - - fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = step_id_addr; + fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = GetStepIdAddr(model); auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID; - GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "session id %lu create failed.", session_id); + GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "Node[%s] create session id %lu failed.", node_name_.c_str(), + session_id); // alloc kernel_buf_ and copy to device. GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), kernel_buf_), - "Node %s alloc buffer for kernel buf failed, size=%zu.", node_name.c_str(), + "Node[%s] alloc buffer for kernel buf failed, size=%zu.", node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL)); GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); - GELOGI("AicpuTfNodeTask[%s] init end.", node_name.c_str()); - return SUCCESS; -} - -Status AicpuTfNodeTask::EnsureSessionCreated(uint64_t session_id) { - auto model_manager = ModelManager::GetInstance(); - GE_CHECK_NOTNULL(model_manager); - GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id), "Create aicpu session %u failed", session_id); + GELOGI("Node[%s] init end.", node_name_.c_str()); return SUCCESS; } -Status AicpuTfNodeTask::SetShapeToBuf(const GeShape &shape, int64_t buf[], uint32_t buf_size) { - auto node_name = node_->GetName(); - uint32_t index = 0; - int64_t shape_size = shape.GetDimNum(); - if (shape_size > buf_size) { - GELOGI("SetShapeToBuf[%s] failed, as shape size %ld is over %u.", node_name.c_str(), shape_size, buf_size); - return PARAM_INVALID; - } - for (; index < shape_size; ++index) { - buf[index] = shape.GetDim(index); - } - if (index < buf_size) { - buf[index] = kDimEndFlag; +uint64_t AicpuTfNodeTask::GetStepIdAddr(const HybridModel &model) { + // get step_id_addr + auto var_tensor = model.GetVariable(NODE_NAME_GLOBAL_STEP); + uint64_t step_id_addr = 0; + if (var_tensor != nullptr) { + step_id_addr = reinterpret_cast(var_tensor->GetData()); } - return SUCCESS; + return step_id_addr; } -Status AicpuTfNodeTask::UpdateShapeToOutputDesc(const GeShape &shape_new, size_t output_index, - GeTensorDescPtr &output_desc) { - auto node_name = node_->GetName(); - auto shape_old = output_desc->GetShape(); - output_desc->SetShape(shape_new); - GELOGI("Update node[%s] out[%zu] shape from %s to %s.", node_name.c_str(), output_index, shape_old.ToString().c_str(), - shape_new.ToString().c_str()); - - auto origin_shape_old = output_desc->GetOriginShape(); - auto origin_format = output_desc->GetOriginFormat(); - auto format = output_desc->GetFormat(); - if (origin_format == format) { - output_desc->SetOriginShape(shape_new); - return SUCCESS; - } - // if format is not same need convert shape - std::vector origin_dims_new; - auto trans_ret = - formats::TransShape(format, shape_new.GetDims(), output_desc->GetDataType(), origin_format, origin_dims_new); - GE_CHK_STATUS_RET(trans_ret, - "Node[%s] out[%zu] originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.", - node_name.c_str(), output_index, origin_format, format, shape_new.ToString().c_str()); - auto origin_shape_new = GeShape(origin_dims_new); - output_desc->SetOriginShape(origin_shape_new); - GELOGI("Node[%s] out[%zu] originFormat[%d] is not same as format[%d], need update from %s ro %s.", node_name.c_str(), - output_index, origin_format, format, origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); +Status AicpuTfNodeTask::EnsureSessionCreated(uint64_t session_id) { + auto model_manager = ModelManager::GetInstance(); + GE_CHECK_NOTNULL(model_manager); + GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id), "Create aicpu session %lu failed", session_id); return SUCCESS; } Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context, std::vector> &out_shape_hbm) { - for (size_t i = 0; i < output_num_; ++i) { + for (auto i = 0; i < node_item_->num_outputs; ++i) { auto &result_summary = output_summary_host_[i]; GE_CHK_RT_RET(rtMemcpy(&result_summary, sizeof(aicpu::FWKAdapter::ResultSummary), output_summary_[i]->GetData(), output_summary_[i]->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); - GELOGI( - "Node[%s] out[%zu] result summary addr=%p," - " shape_data_ptr=0x%lx, shape_data_size=%lu, raw_data_ptr=0x%lx, raw_data_size=%lu.", - node_->GetName().c_str(), i, output_summary_[i]->GetData(), result_summary.shape_data_ptr, - result_summary.shape_data_size, result_summary.raw_data_ptr, result_summary.raw_data_size); - auto raw_data_size = result_summary.raw_data_size; std::unique_ptr tensor_buffer; - GE_CHK_STATUS_RET(AllocTensorBuffer(raw_data_size, tensor_buffer), "alloc tensor buffer failed, raw_data_size=%lu", + GE_CHK_STATUS_RET(AllocTensorBuffer(raw_data_size, tensor_buffer), + "Node[%s] out[%d] alloc tensor buffer failed, raw_data_size=%lu", node_name_.c_str(), i, raw_data_size); auto status = context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release()))); - GE_CHK_STATUS_RET(status, "SetOutput %zu failed.", i); + GE_CHK_STATUS_RET(status, "Node[%s] set output %d failed.", node_name_.c_str(), i); auto shape_data_size = result_summary.shape_data_size; std::unique_ptr shape_buffer; GE_CHK_STATUS_RET(AllocTensorBuffer(shape_data_size, shape_buffer), - "alloc shape buffer failed, shape_data_size=%lu", shape_data_size); + "Node[%s] out[%d] alloc shape buffer failed, shape_data_size=%lu", node_name_.c_str(), i, + shape_data_size); out_shape_hbm.emplace_back(std::move(shape_buffer)); } return SUCCESS; @@ -314,19 +341,56 @@ Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context, Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, const std::vector> &out_shape_hbm) { - GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == output_num_, INTERNAL_ERROR, - "Node %s has %zu outputs but out shape is %zu", node_->GetName().c_str(), output_num_, + GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, + "Node[%s] has %d outputs but out shape is %zu.", node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); + uint64_t copy_num = 0; + GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm, copy_num)); + + STR_FWK_OP_KERNEL aicpu_task = {0}; + std::string task_info; + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[GenMemCopyTask] Start"); + GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[GenMemCopyTask] End"); + + std::unique_ptr kernel_workspace_buf; + GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), + "Node[%s] alloc copy task workspace buf failed, size=%zu.", node_name_.c_str(), task_info.size()); + + GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), task_info.data(), task_info.size(), + RT_MEMCPY_HOST_TO_DEVICE)); + + aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); + aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; + aicpu_task.fwkKernelBase.fwk_kernel.extInfoLen = 0; + + GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &aicpu_task, + sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); + + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); + GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, + context.GetStream())); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); + + GE_CHK_RT_RET(rtStreamSynchronize(context.GetStream())); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[SynchronizeCopy] End"); + return SUCCESS; +} + +Status AicpuTfNodeTask::PrepareCopyInputs(const TaskContext &context, + const std::vector> &out_shape_hbm, + uint64_t ©_num) { std::vector copy_input_release_flag; std::vector copy_input_data_size; std::vector copy_input_src; std::vector copy_input_dst; - for (size_t i = 0; i < output_num_; ++i) { + for (auto i = 0; i < node_item_->num_outputs; ++i) { const auto &summary = output_summary_host_[i]; - GELOGI("node[%s] [%zu]th output summary, shape data=%lx, shape data size=%lu, raw data=%lx, raw data size=%lu.", - node_->GetName().c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, + GELOGI("Node[%s] out[%d] summary, shape data=0x%lx, shape data size=%lu, raw data=0x%lx, raw data size=%lu.", + node_name_.c_str(), i, summary.shape_data_ptr, summary.shape_data_size, summary.raw_data_ptr, summary.raw_data_size); if (summary.raw_data_size > 0) { auto output = context.GetOutput(i); @@ -349,15 +413,9 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, } } - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(copy_input_release_flag.empty(), return INTERNAL_ERROR, "Node %s need copy num is 0", - node_->GetName().c_str()); + copy_num = copy_input_release_flag.size(); - auto copy_num = copy_input_release_flag.size(); - STR_FWK_OP_KERNEL aicpu_task = {0}; - std::string task_info; - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[GenMemCopyTask] Start"); - GE_CHK_STATUS_RET_NOLOG(GenMemCopyTask(copy_num, aicpu_task, task_info)); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[GenMemCopyTask] End"); + GE_CHK_BOOL_RET_STATUS(copy_num > 0, INTERNAL_ERROR, "Node[%s] need copy num is 0", node_name_.c_str()); // copy task need copy output and output shape const size_t copy_input_buf_len = copy_num * sizeof(uint64_t); @@ -370,271 +428,255 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT_RET(rtMemcpy(copy_input_dst_dev_->GetData(), copy_input_dst_dev_->GetSize(), ©_input_dst[0], copy_input_buf_len, RT_MEMCPY_HOST_TO_DEVICE)); - - std::unique_ptr kernel_workspace_buf; - GE_CHK_STATUS_RET(AllocTensorBuffer(task_info.size(), kernel_workspace_buf), - "Node %s alloc copy task workspace buf failed, size=%zu", node_->GetName().c_str(), - task_info.size()); - - GE_CHK_RT_RET(rtMemcpy(kernel_workspace_buf->GetData(), task_info.size(), task_info.data(), task_info.size(), - RT_MEMCPY_HOST_TO_DEVICE)); - - aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast(kernel_workspace_buf->GetData()); - aicpu_task.fwkKernelBase.fwk_kernel.extInfoAddr = 0; - aicpu_task.fwkKernelBase.fwk_kernel.extInfoNum = 0; - - GE_CHK_RT_RET(rtMemcpy(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), &aicpu_task, - sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE)); - - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[LaunchCopy] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, - context.GetStream())); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[LaunchCopy] End"); - - GE_CHK_RT_RET(rtStreamSynchronize(context.GetStream())); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[SynchronizeCopy] End"); return SUCCESS; } -Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, string &task_info) { +Status AicpuTfNodeTask::GenMemCopyTask(uint64_t copy_num, STR_FWK_OP_KERNEL &task, std::string &task_info) { auto instance_ptr = ge::GELib::GetInstance(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(instance_ptr == nullptr || !instance_ptr->InitFlag(), return GE_CLI_GE_NOT_INITIALIZED, - "GE is not initialized"); + GE_CHK_BOOL_RET_STATUS(instance_ptr != nullptr && instance_ptr->InitFlag(), GE_CLI_GE_NOT_INITIALIZED, + "GE is not initialized"); static constexpr const char *const kKernelLibName = "aicpu_kernel"; OpsKernelInfoStorePtr kernel_info = instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(kKernelLibName); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(kernel_info == nullptr, return FAILED, "Get op kernel info store failed"); + GE_CHK_BOOL_RET_STATUS(kernel_info != nullptr, FAILED, "Get op kernel info store[%s] failed", kKernelLibName); auto ret = kernel_info->GenMemCopyTask(copy_num, task, task_info); - GE_CHK_STATUS_RET(ret, "call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret); + GE_CHK_STATUS_RET(ret, "Call aicpu GenMemCopyTask failed, copy_num=%lu, ret=%u", copy_num, ret); return SUCCESS; } Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm) { - GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == output_num_, INTERNAL_ERROR, - "Node %s has %zu outputs but out shape is %zu", node_->GetName().c_str(), output_num_, + GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, + "Node[%s] has %d outputs but out shape is %zu", node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size()); - auto op_desc = node_->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - for (size_t i = 0; i < output_num_; ++i) { + for (auto i = 0; i < node_item_->num_outputs; ++i) { const auto &result_summary = output_summary_host_[i]; - auto output_desc = op_desc->MutableOutputDesc(i); + auto output_desc = node_item_->op_desc->MutableOutputDesc(i); std::vector shape_dims; if (result_summary.shape_data_size > 0) { const auto &shape_hbm = out_shape_hbm[i]; GE_CHK_BOOL_RET_STATUS((result_summary.shape_data_size % sizeof(int64_t) == 0), INTERNAL_ERROR, - "node %s %zuth output shape data size is %lu is not divided by int64_t.", - node_->GetName().c_str(), i, result_summary.shape_data_size); + "Node[%s] [%d]th output shape data size is %lu is not divided by int64_t.", + node_name_.c_str(), i, result_summary.shape_data_size); uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t); - GELOGI("node %s %zuth output dim num=%lu.", node_->GetName().c_str(), i, dim_num); + GELOGI("Node[%s] [%d]th output dim num=%u.", node_name_.c_str(), i, dim_num); std::unique_ptr shape_addr(new (std::nothrow) int64_t[dim_num]()); GE_CHECK_NOTNULL(shape_addr); GE_CHK_RT_RET(rtMemcpy(shape_addr.get(), result_summary.shape_data_size, shape_hbm->GetData(), shape_hbm->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); for (uint32_t dim_idx = 0; dim_idx < dim_num; ++dim_idx) { shape_dims.emplace_back(shape_addr[dim_idx]); - GELOGD("node %s %zuth output dim[%u]=%lu.", node_->GetName().c_str(), i, dim_idx, shape_addr[dim_idx]); + GELOGD("Node[%s] [%d]th output dim[%u]=%ld.", node_name_.c_str(), i, dim_idx, shape_addr[dim_idx]); } } GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(shape_dims), i, output_desc), - "update node %s %uth output shape failed.", node_->GetName().c_str(), i); + "Node[%s] update [%d]th output shape failed.", node_name_.c_str(), i); } return SUCCESS; } -Status AicpuTfNodeTask::UpdateOutputShapeFromExtInfo() { - auto node_name = node_->GetName(); - if (output_num_ == 0) { - GELOGI("Task [%s] output_num is 0, no need reset output shape.", node_name.c_str()); - return SUCCESS; - } - - auto ext_output_shape_offset = ext_info_num_ * sizeof(ExtInfo) + sizeof(uint32_t) + input_num_ * sizeof(MaxShape); - size_t ext_info_output_shape_len = output_num_ * sizeof(MaxShape); - auto output_shape_host_buf = ext_info_addr_host_.get() + ext_output_shape_offset; - auto output_shape_dev_buf = reinterpret_cast(ext_info_addr_dev_->GetData()) + ext_output_shape_offset; - - GE_CHK_RT_RET(rtMemcpy(output_shape_host_buf, ext_info_output_shape_len, output_shape_dev_buf, - ext_info_output_shape_len, RT_MEMCPY_DEVICE_TO_HOST)); - - auto op_desc = node_->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - auto shapeBuf = reinterpret_cast(output_shape_host_buf); - for (uint32_t i = 0; i < output_num_; ++i) { - std::vector dims; - GetShapeFromBuf(shapeBuf + i * kMaxDimCount, kMaxDimCount, dims); - auto output_desc = op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL(output_desc); - GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(GeShape(dims), i, output_desc), - "update node %s %uth output shape failed.", node_name.c_str(), i); - } - - return SUCCESS; -} - Status AicpuTfNodeTask::UpdateShapeAndDataByResultSummary(TaskContext &context) { - GELOGI("Task [%s] update shape and data by result summary begin.", node_->GetName().c_str()); + GELOGI("Node[%s] update shape and data by result summary begin.", node_name_.c_str()); std::vector> out_shape_hbm; GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(context, out_shape_hbm), - "node %s read ResultSummary and update output shape failed.", node_->GetName().c_str()); + "Node[%s] read ResultSummary and update output shape failed.", node_name_.c_str()); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), - "[ReadResultSummaryAndPrepareMemory] End"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[ReadResultSummaryAndPrepareMemory] End"); - GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm), "node %s copy data to output failed.", - node_->GetName().c_str()); + GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm), "Node[%s] copy data to output failed.", node_name_.c_str()); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[CopyDataToHbm] End"); + RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[CopyDataToHbm] End"); - GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm), "node %s update shape by hbm buffer failed.", - node_->GetName().c_str()); + GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm), "Node[%s] update shape by hbm buffer failed.", + node_name_.c_str()); - GELOGI("Task [%s] update shape and data by result summary end.", node_->GetName().c_str()); + GELOGI("Node[%s] update shape and data by result summary end.", node_name_.c_str()); return SUCCESS; } -void AicpuTfNodeTask::GetShapeFromBuf(const int64_t buf[], uint32_t buf_size, std::vector &dims) { - for (uint32_t index = 0; index < buf_size; ++index) { - auto tmpDim = buf[index]; - if (tmpDim == kDimEndFlag) { - break; - } - dims.emplace_back(tmpDim); - } -} - -Status AicpuTfNodeTask::UpdateArgs(TaskContext &context) { - auto node_name = node_->GetName(); - GELOGI("AicpuTfNodeTask[%s] UpdateArgs begin. unknown_type=%d", node_name.c_str(), unknown_type_); - auto op_desc = node_->GetOpDesc(); - auto io_nums = input_num_ + output_num_; - if (io_nums == 0) { - GELOGI("Node %s has no input and output, no need update args.", node_name.c_str()); - return SUCCESS; - } - - vector io_addrs(io_nums, 0UL); - size_t ext_shape_nums = (unknown_type_ == DEPEND_COMPUTE) ? input_num_ : io_nums; - vector io_shapes(ext_shape_nums); - - uint32_t index = 0; - for (size_t i = 0; i < input_num_; ++i, ++index) { +Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) { + vector io_addrs; + io_addrs.reserve(node_item_->num_inputs + node_item_->num_outputs); + for (auto i = 0; i < node_item_->num_inputs; ++i) { auto inputData = context.GetInput(i); GE_CHECK_NOTNULL(inputData); - auto input_desc = op_desc->MutableInputDesc(i); - GE_CHECK_NOTNULL(input_desc); - auto &shape = input_desc->MutableShape(); - - GELOGD("io_addr[%u] = %p, size = %zu", index, inputData->GetData(), inputData->GetSize()); - io_addrs[index] = reinterpret_cast(inputData->GetData()); - GE_CHK_STATUS_RET(SetShapeToBuf(shape, io_shapes[index].dims, kMaxDimCount), - "task %s input[%zu] SetShapeToBuf failed.", node_name.c_str(), i); + GELOGD("Node[%s] input[%d] addr = %p, size = %zu", node_name_.c_str(), i, inputData->GetData(), + inputData->GetSize()); + io_addrs.emplace_back(reinterpret_cast(inputData->GetData())); } if (unknown_type_ != DEPEND_COMPUTE) { // unknown type 4 do this in call back. GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); - for (size_t j = 0; j < output_num_; ++j, ++index) { + for (auto j = 0; j < node_item_->num_outputs; ++j) { auto outputData = context.GetOutput(j); GE_CHECK_NOTNULL(outputData); - auto output_desc = op_desc->MutableOutputDesc(j); - GE_CHECK_NOTNULL(output_desc); - auto shape = output_desc->GetShape(); - - // shape range need use range update shape - if (unknown_type_ == DEPEND_SHAPE_RANGE) { - std::vector> range; - auto range_ret = output_desc->GetShapeRange(range); - GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, - "node %s has is shape range but get GetShapeRange failed, ret=%u.", node_name.c_str(), - range_ret); - for (size_t k = 0; k < range.size(); ++k) { - if (shape.GetDim(k) < 0 && k < range.size()) { - GELOGD("node %s output[%zu] update dim[%zu] from %lu to range max %lu.", node_name.c_str(), j, k, - shape.GetDim(k), range[k].second); - shape.SetDim(k, range[k].second); - } - } - } - GELOGD("io_addr[%u] = %p, size = %zu", index, outputData->GetData(), outputData->GetSize()); - io_addrs[index] = reinterpret_cast(outputData->GetData()); - GE_CHK_STATUS_RET(SetShapeToBuf(shape, io_shapes[index].dims, kMaxDimCount), - "task %s output[%zu] SetShapeToBuf failed.", node_name.c_str(), j); + GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, outputData->GetData(), + outputData->GetSize()); + io_addrs.emplace_back(reinterpret_cast(outputData->GetData())); } } else { // unknown type 4 use result summary update ioaddr. - GELOGI("AicpuTfNodeTask[%s] is unknown-shape, use ResultSummary as out-addr.", node_name.c_str()); - GE_CHK_BOOL_RET_STATUS(output_summary_.size() == output_num_, INTERNAL_ERROR, - "node %s has %zu output but %zu output summary.", node_name.c_str(), output_num_, - output_summary_.size()); + GELOGI("Node[%s] is depend compute node, use result summary as out addr.", node_name_.c_str()); + GE_CHK_BOOL_RET_STATUS(output_summary_.size() == static_cast(node_item_->num_outputs), INTERNAL_ERROR, + "Node[%s] has %d output but %zu output summary.", node_name_.c_str(), + node_item_->num_outputs, output_summary_.size()); - for (size_t j = 0; j < output_num_; ++j, ++index) { + for (auto j = 0; j < node_item_->num_outputs; ++j) { void *summary_addr = output_summary_[j]->GetData(); - io_addrs[index] = reinterpret_cast(summary_addr); + io_addrs.emplace_back(reinterpret_cast(summary_addr)); } } // if has input and output, need copy to ioaddr - if (io_nums > 0) { + if (!io_addrs.empty()) { // copy input and output to device GE_CHK_RT_RET(rtMemcpy(input_output_addr_->GetData(), input_output_addr_->GetSize(), &io_addrs[0], sizeof(uint64_t) * io_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE)); } - - // if has shape ext info, need copy to ext addr - if (ext_shape_nums > 0) { - uint32_t offset = ext_info_num_ * sizeof(ExtInfo) + sizeof(uint32_t); - uint32_t len = sizeof(MaxShape) * ext_shape_nums; - auto ext_addr_dev_base = reinterpret_cast(ext_info_addr_dev_->GetData()) + offset; - // copy input and output shapes to device - GE_CHK_RT_RET(rtMemcpy(ext_addr_dev_base, ext_info_addr_dev_->GetSize() - offset, &io_shapes[0], len, - RT_MEMCPY_HOST_TO_DEVICE)); - } - - GELOGI("AicpuTfNodeTask[%s] UpdateArgs end.", node_name.c_str()); return SUCCESS; } -Status AicpuTfNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { - auto node_name = node_->GetName(); - GELOGI("AicpuTfNodeTask[%s] ExecuteAsync Start. unknown_type=%d.", node_name.c_str(), unknown_type_); - +Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { + GELOGI("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); uint32_t flag = RT_KERNEL_DEFAULT; GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); + GELOGI("Node[%s] launch end.", node_name_.c_str()); + return SUCCESS; +} - auto callback = [=, &context]() { - GELOGI("AicpuTfNodeTask[%s] callback start.", node_->GetName().c_str()); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[TaskCallback] Start"); - Status callback_ret = SUCCESS; - // check need update shape, call update shape. - if (unknown_type_ == DEPEND_SHAPE_RANGE) { - // check result - callback_ret = UpdateOutputShapeFromExtInfo(); - } else if (unknown_type_ == DEPEND_COMPUTE) { - callback_ret = UpdateShapeAndDataByResultSummary(context); - } +Status AicpuTfNodeTask::TaskCallback(TaskContext &context) { + GELOGI("Node[%s] task callback start. unknown_type=%d.", node_name_.c_str(), unknown_type_); + Status callback_ret = SUCCESS; + // check need update shape, call update shape. + if (unknown_type_ == DEPEND_SHAPE_RANGE) { + // check result + callback_ret = UpdateOutputShapeFromExtInfo(); + } else if (unknown_type_ == DEPEND_COMPUTE) { + callback_ret = UpdateShapeAndDataByResultSummary(context); + } + GELOGI("Node[%s] task callback end.", node_name_.c_str()); + return callback_ret; +} - GELOGI("AicpuTfNodeTask[%s] refresh output complete, ret = %d.", node_->GetName().c_str(), callback_ret); - RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_->GetName().c_str(), "[TaskCallback] End"); +Status AicpuNodeTask::Init(const HybridModel &model) { + auto node_name = node_name_; + GELOGI("Node[%s] init start.", node_name.c_str()); - if (done_callback != nullptr) { - context.SetStatus(callback_ret); - done_callback(); - } + GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED, + "Node[%s] unknown type[%d] is depend compute, it's not supported now.", node_name.c_str(), + unknown_type_); - GELOGI("AicpuTfNodeTask[%s] callback end.", node_->GetName().c_str()); - }; + GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel(), FAILED, "Node[%s] task def does not has kernel.", node_name.c_str()); + auto &kernel_def = task_def_.kernel(); - GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(callback)); + auto &args = kernel_def.args(); + args_size_ = kernel_def.args_size(); + + GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, "Node[%s] task def args.size=%zu, but args_size=%u.", + node_name.c_str(), args.size(), args_size_); + + GE_CHK_BOOL_RET_STATUS(args_size_ >= sizeof(aicpu::AicpuParamHead), FAILED, + "Node[%s] task def args_size=%u is less than aicpu param head len=%zu.", node_name.c_str(), + args_size_, sizeof(aicpu::AicpuParamHead)); + + args_.reset(new (std::nothrow) uint8_t[args_size_]()); + GE_CHK_BOOL_RET_STATUS(args_ != nullptr, FAILED, "Node[%s] malloc args mem failed, args_size_=%u.", node_name.c_str(), + args_size_); + + errno_t sec_ret = memcpy_s(args_.get(), args_size_, args.c_str(), args.size()); + GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR, "Node[%s] copy args failed, ret: %d", node_name_.c_str(), + sec_ret); - GELOGI("AicpuTfNodeTask[%s] ExecuteAsync end.", node_name.c_str()); + auto aicpu_param_head = reinterpret_cast(args_.get()); + auto io_num = node_item_->num_inputs + node_item_->num_outputs; + + // check AicpuParamHead ioAddrNum is right. + GE_CHK_BOOL_RET_STATUS((aicpu_param_head->ioAddrNum == static_cast(io_num)), PARAM_INVALID, + "Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.", node_name.c_str(), + aicpu_param_head->ioAddrNum, node_item_->num_inputs, node_item_->num_outputs); + + auto mini_len = sizeof(aicpu::AicpuParamHead) + io_num * sizeof(uint64_t); + // check args len must over mini len. + GE_CHK_BOOL_RET_STATUS((mini_len <= aicpu_param_head->length), PARAM_INVALID, + "Node[%s] param head length=%u, but min len need %zu.", node_name.c_str(), + aicpu_param_head->length, mini_len); + + auto &kernel_ext_info = kernel_def.kernel_ext_info(); + auto kernel_ext_info_size = kernel_def.kernel_ext_info_size(); + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + "Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", node_name.c_str(), + kernel_ext_info.size(), kernel_ext_info_size); + + GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name.c_str()); + + aicpu_param_head->extInfoLength = ext_info_addr_dev_->GetSize(); + aicpu_param_head->extInfoAddr = reinterpret_cast(ext_info_addr_dev_->GetData()); + + GELOGI("Node[%s] init end.", node_name.c_str()); return SUCCESS; } +Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) { + vector io_addrs; + io_addrs.reserve(node_item_->num_inputs + node_item_->num_outputs); + for (auto i = 0; i < node_item_->num_inputs; ++i) { + auto inputData = context.GetInput(i); + GE_CHECK_NOTNULL(inputData); + + GELOGD("Node[%s] input[%d] = %p, size = %zu", node_name_.c_str(), i, inputData->GetData(), inputData->GetSize()); + io_addrs.emplace_back(reinterpret_cast(inputData->GetData())); + } + + GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); + for (auto j = 0; j < node_item_->num_outputs; ++j) { + auto outputData = context.GetOutput(j); + GE_CHECK_NOTNULL(outputData); + GELOGD("Node[%s] output[%d] addr = %p, size = %zu", node_name_.c_str(), j, outputData->GetData(), + outputData->GetSize()); + io_addrs.emplace_back(reinterpret_cast(outputData->GetData())); + } + + auto io_addr = args_.get() + sizeof(aicpu::AicpuParamHead); + // if has input and output, need copy to ioaddr + error_t cpy_ret = + memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size()); + GE_CHK_BOOL_RET_STATUS(cpy_ret == EOK, INTERNAL_ERROR, + "Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.", + node_name_.c_str(), cpy_ret, args_size_, io_addrs.size()); + return SUCCESS; +} + +Status AicpuNodeTask::LaunchTask(TaskContext &context) { + GELOGI("Node[%s] launch task start. unknown_type=%d.", node_name_.c_str(), unknown_type_); + const auto &so_name = task_def_.kernel().so_name(); + const auto &kernel_name = task_def_.kernel().kernel_name(); + uint32_t flag = RT_KERNEL_DEFAULT; + auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), + reinterpret_cast(kernel_name.c_str()), + 1, // default core dim is 1 + args_.get(), args_size_, nullptr, context.GetStream(), flag); + GE_CHK_RT_RET(rt_ret); + GELOGI("Node[%s] launch task end.", node_name_.c_str()); + return SUCCESS; +} + +Status AicpuNodeTask::TaskCallback(TaskContext &context) { + GELOGI("Node[%s] task callback start, unknown_type=%d.", node_name_.c_str(), unknown_type_); + Status callback_ret = SUCCESS; + // check need update shape, call update shape. + if (unknown_type_ == DEPEND_SHAPE_RANGE) { + // check result + callback_ret = UpdateOutputShapeFromExtInfo(); + } else { + GELOGI("Node[%s] unknown shape type is %d no need update output shape.", node_name_.c_str(), unknown_type_); + } + GELOGI("Node[%s] task callback end.", node_name_.c_str()); + return callback_ret; +} + Status AiCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const { // malloc HBM memory at Init, here just update them return task.UpdateArgs(context); @@ -643,19 +685,34 @@ Status AiCpuNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) cons Status AiCpuNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const { GE_CHECK_NOTNULL(node); - GELOGI("Node[%s] create task start.", node->GetName().c_str()); + GELOGI("Node[%s] load task start.", node->GetName().c_str()); + auto node_item = model.GetNodeItem(node); + GE_CHECK_NOTNULL(node_item); auto task_defs = model.GetTaskDefs(node); GE_CHECK_NOTNULL(task_defs); - GE_CHK_BOOL_EXEC((*task_defs).size() == 1, return PARAM_INVALID, "aicpu op[%s] task_def num[%zu] != 1", - node->GetName().c_str(), (*task_defs).size()); - auto aicpu_task = MakeShared(node, (*task_defs)[0]); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(aicpu_task == nullptr, return MEMALLOC_FAILED, - "create aicpuTfNodeTask for node %s failed", node->GetName().c_str()); + GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "Node[%s] task_def num[%zu] != 1", + node->GetName().c_str(), (*task_defs).size()); + const auto &task_def = (*task_defs)[0]; + std::shared_ptr aicpu_task; + if (task_def.type() == RT_MODEL_TASK_KERNEL_EX) { + GELOGI("Node[%s] task type=%u is AicpuTfNodeTask.", node->GetName().c_str(), task_def.type()); + aicpu_task = MakeShared(node_item, task_def); + } else if (task_def.type() == RT_MODEL_TASK_KERNEL) { + GELOGI("Node[%s] task type=%u is AicpuNodeTask.", node->GetName().c_str(), task_def.type()); + aicpu_task = MakeShared(node_item, task_def); + } else { + GELOGE(UNSUPPORTED, "Node[%s] task type=%u is not supported by aicpu node executor.", node->GetName().c_str(), + task_def.type()); + return UNSUPPORTED; + } + + GE_CHK_BOOL_RET_STATUS(aicpu_task != nullptr, MEMALLOC_FAILED, "Load task for node %s failed.", + node->GetName().c_str()); - GE_CHK_STATUS_RET(aicpu_task->Init(model), "AicpuTfNodeTask %s Init failed.", node->GetName().c_str()); + GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str()); task = std::move(aicpu_task); - GELOGI("Node[%s] create task end.", node->GetName().c_str()); + GELOGI("Node[%s] load task end.", node->GetName().c_str()); return SUCCESS; } } // namespace hybrid diff --git a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h index 0444c2aa..ce3f9707 100644 --- a/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h +++ b/src/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h @@ -20,32 +20,84 @@ #include "external/graph/types.h" #include "cce/aicpu_engine_struct.h" #include "hybrid/node_executor/node_executor.h" +#include "aicpu_ext_info.h" namespace ge { namespace hybrid { -class AicpuTfNodeTask : public NodeTask { + +class AicpuNodeTaskBase : public NodeTask { public: - AicpuTfNodeTask(const NodePtr &node, const domi::TaskDef &task_def) : node_(node), task_def_(task_def) {} + AicpuNodeTaskBase(const NodeItem *node_item, const domi::TaskDef &task_def) + : node_item_(node_item), + task_def_(task_def), + node_name_(node_item->node_name), + node_type_(node_item->node_type), + unknown_type_(node_item->shape_inference_type), + aicpu_ext_handle_(node_item->node_name, node_item->num_inputs, node_item->num_outputs, + node_item->shape_inference_type) {} - Status Init(const HybridModel &model); + ~AicpuNodeTaskBase() override = default; - ~AicpuTfNodeTask() override = default; + virtual Status Init(const HybridModel &model) = 0; Status UpdateArgs(TaskContext &context) override; + Status ExecuteAsync(TaskContext &context, std::function done_callback) override; + protected: + virtual Status InitExtInfo(const std::string &kernel_ext_info); + + virtual Status UpdateExtInfo(); + + virtual Status UpdateOutputShapeFromExtInfo(); + + Status UpdateShapeToOutputDesc(const GeShape &shape_new, int32_t output_index, GeTensorDescPtr &output_desc); + + virtual Status LaunchTask(TaskContext &context) = 0; + + virtual Status TaskCallback(TaskContext &context) = 0; + + virtual Status UpdateIoAddr(TaskContext &context) = 0; + + static Status AllocTensorBuffer(size_t size, std::unique_ptr &tensor_buffer); + + protected: + const NodeItem *node_item_; + // just reference. + const domi::TaskDef &task_def_; + + const std::string node_name_; + + const std::string node_type_; + + UnknowShapeOpType unknown_type_ = DEPEND_IN_SHAPE; + + AicpuExtInfoHandler aicpu_ext_handle_; + + // ext info addr, device mem + std::unique_ptr ext_info_addr_dev_; +}; + +class AicpuTfNodeTask : public AicpuNodeTaskBase { + public: + AicpuTfNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) : AicpuNodeTaskBase(node_item, task_def) {} + + ~AicpuTfNodeTask() override = default; + + Status Init(const HybridModel &model) override; + + protected: + Status LaunchTask(TaskContext &context) override; + + Status TaskCallback(TaskContext &context) override; + + Status UpdateIoAddr(TaskContext &context) override; + private: - Status InitExtInfo(); Status InitForDependComputeTask(); - Status SetShapeToBuf(const GeShape &shape, int64_t buf[], uint32_t buf_size); - void GetShapeFromBuf(const int64_t buf[], uint32_t buf_size, std::vector &dims); - Status UpdateOutputShapeFromExtInfo(); - Status UpdateShapeAndDataByResultSummary(TaskContext &context); - Status UpdateShapeToOutputDesc(const GeShape &shape_new, size_t output_index, GeTensorDescPtr &output_desc); - /// /// read result summary and prepare copy task memory. /// @param context task context @@ -58,22 +110,14 @@ class AicpuTfNodeTask : public NodeTask { Status UpdateShapeByHbmBuffer(TaskContext &context, const std::vector> &out_shape_hbm); - // common method - static Status AllocTensorBuffer(size_t size, std::unique_ptr &tensor_buffer); + Status PrepareCopyInputs(const TaskContext &context, const std::vector> &out_shape_hbm, + uint64_t ©_num); + static Status EnsureSessionCreated(uint64_t session_id); - static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, string &task_info); + static Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, std::string &task_info); + static uint64_t GetStepIdAddr(const HybridModel &model); private: - const NodePtr node_; - // just reference. - const domi::TaskDef &task_def_; - - UnknowShapeOpType unknown_type_ = DEPEND_IN_SHAPE; - - size_t input_num_ = 0; - - size_t output_num_ = 0; - // kernel buf, device mem std::unique_ptr kernel_buf_; @@ -82,13 +126,9 @@ class AicpuTfNodeTask : public NodeTask { // input and output addr, device mem std::unique_ptr input_output_addr_; - // ext info addr, device mem - std::unique_ptr ext_info_addr_dev_; - std::unique_ptr ext_info_addr_host_; - uint32_t ext_info_num_ = 0; - // just used for depend DEPEND_COMPUTE op std::unique_ptr copy_task_args_buf_; + std::vector> output_summary_; std::vector output_summary_host_; @@ -100,6 +140,29 @@ class AicpuTfNodeTask : public NodeTask { std::unique_ptr copy_input_dst_dev_; }; +class AicpuNodeTask : public AicpuNodeTaskBase { + public: + AicpuNodeTask(const NodeItem *node_item, const domi::TaskDef &task_def) : AicpuNodeTaskBase(node_item, task_def) {} + + ~AicpuNodeTask() override = default; + + Status Init(const HybridModel &model) override; + + protected: + Status LaunchTask(TaskContext &context) override; + + Status TaskCallback(TaskContext &context) override; + + Status UpdateIoAddr(TaskContext &context) override; + + private: + // host mem + std::unique_ptr args_; + + // args size + uint32_t args_size_ = 0; +}; + class AiCpuNodeExecutor : public NodeExecutor { public: Status LoadTask(const HybridModel &model, const NodePtr &node, std::shared_ptr &task) const override; diff --git a/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 97c8cdbe..81960c48 100644 --- a/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/src/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -39,6 +39,7 @@ Status KnownNodeTask::ExecuteAsync(TaskContext &context, std::function d GELOGW("[%s] KnownNodeExecutor::ExecuteAsync davinci moel has no taskinfo.", context.GetNodeName()); for (int i = 0; i < context.NumInputs(); ++i) { auto tensor = context.MutableInput(i); + GE_CHECK_NOTNULL(tensor); GE_CHK_STATUS_RET(context.SetOutput(i, *tensor), "[%s] Failed to set output[%d]", context.GetNodeName(), i); } } diff --git a/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc b/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc index 2c849b59..c3bc9a41 100644 --- a/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc +++ b/src/ge/hybrid/node_executor/hostcpu/ge_local_node_executor.cc @@ -74,18 +74,18 @@ Status RefInputTask::RefOneByOne(TaskContext &context) { Status RefInputTask::RefByOrder(const std::vector &ref_order, TaskContext &context) { GELOGI("node %s type %s ref input by order begin.", node_name_.c_str(), node_type_.c_str()); - uint32_t output_num = context.NumOutputs(); - if (ref_order.size() != output_num) { - GELOGE(INTERNAL_ERROR, "node %s type %s has %u outputs but only has %u out ref index.", node_name_.c_str(), + int32_t output_num = context.NumOutputs(); + if (ref_order.size() != static_cast(output_num)) { + GELOGE(INTERNAL_ERROR, "node %s type %s has %d outputs but only has %zu out ref index.", node_name_.c_str(), node_type_.c_str(), output_num, ref_order.size()); return INTERNAL_ERROR; } - for (uint32_t out_index = 0; out_index < output_num; ++out_index) { + for (auto out_index = 0; out_index < output_num; ++out_index) { auto ref_input_index = ref_order[out_index]; auto input = context.GetInput(ref_input_index); GE_CHECK_NOTNULL(input); context.SetOutput(out_index, *input); - GELOGD("node %s type %s output[%u] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, + GELOGD("node %s type %s output[%d] ref input[%u] addr=%p.", node_name_.c_str(), node_type_.c_str(), out_index, ref_input_index, input->GetData()); } GELOGI("node %s type %s ref input by order end.", node_name_.c_str(), node_type_.c_str()); @@ -124,9 +124,9 @@ Status DependInputShapeTask::Execute(TaskContext &context) { GELOGE(compute_ret, "node %s type %s compute failed or not imply.", node_->GetName().c_str(), node_type.c_str()); return compute_ret; } - uint32_t output_num = context.NumOutputs(); - if (output_num != outputs.size()) { - GELOGE(INTERNAL_ERROR, "node %s type %s has %u output, but kernel compute only has %zu output.", + int32_t output_num = context.NumOutputs(); + if (static_cast(output_num) != outputs.size()) { + GELOGE(INTERNAL_ERROR, "node %s type %s has %d output, but kernel compute only has %zu output.", node_->GetName().c_str(), node_type.c_str(), output_num, outputs.size()); return INTERNAL_ERROR; } @@ -135,26 +135,26 @@ Status DependInputShapeTask::Execute(TaskContext &context) { GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs()); // copy data to output - for (uint32_t i = 0; i < output_num; ++i) { + for (auto i = 0; i < output_num; ++i) { GeTensorPtr &tensor = outputs[i]; GE_CHECK_NOTNULL(tensor); auto tensor_data = tensor->GetData(); auto tensor_value = context.MutableOutput(i); GE_CHECK_NOTNULL(tensor_value); if (tensor_data.GetSize() > tensor_value->GetSize()) { - GELOGE(INTERNAL_ERROR, "node:%s type:%s [%zu]th compute data size=%zu, but context data size=%zu.", + GELOGE(INTERNAL_ERROR, "node:%s type:%s [%d]th compute data size=%zu, but context data size=%zu.", node_->GetName().c_str(), node_type.c_str(), i, tensor_data.GetSize(), tensor_value->GetSize()); return INTERNAL_ERROR; } - GELOGI("node:%s type:%s [%zu]th output data=%p, out size=%zu, data size=%zu.", node_->GetName().c_str(), + GELOGI("node:%s type:%s [%d]th output data=%p, out size=%zu, data size=%zu.", node_->GetName().c_str(), node_type.c_str(), i, tensor_value->GetData(), tensor_value->GetSize(), tensor_data.GetSize()); if (tensor_data.GetSize() > 0) { GE_CHK_RT_RET(rtMemcpy(tensor_value->MutableData(), tensor_value->GetSize(), tensor_data.GetData(), tensor_data.GetSize(), RT_MEMCPY_HOST_TO_DEVICE)); } - GELOGI("node:%s type:%s [%zu]th set data success, data size=%zu.", node_->GetName().c_str(), node_type.c_str(), i, + GELOGI("node:%s type:%s [%d]th set data success, data size=%zu.", node_->GetName().c_str(), node_type.c_str(), i, tensor_data.GetSize()); } return SUCCESS; diff --git a/src/ge/hybrid/node_executor/node_executor.cc b/src/ge/hybrid/node_executor/node_executor.cc index 38d37aa1..f3b86948 100644 --- a/src/ge/hybrid/node_executor/node_executor.cc +++ b/src/ge/hybrid/node_executor/node_executor.cc @@ -147,4 +147,4 @@ NodeExecutorRegistrar::NodeExecutorRegistrar(NodeExecutorManager::ExecutorType e NodeExecutorManager::GetInstance().RegisterExecutorBuilder(executor_type, builder); } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/hybrid/node_executor/task_context.cc b/src/ge/hybrid/node_executor/task_context.cc index 91bcc402..42c653be 100644 --- a/src/ge/hybrid/node_executor/task_context.cc +++ b/src/ge/hybrid/node_executor/task_context.cc @@ -262,8 +262,8 @@ Status TaskContext::PropagateOutputs() { auto dst_input_idx = dst_input_index_and_node.first; auto dst_node_item = dst_input_index_and_node.second; GELOGI( - "Propagate output of node %s, output index = %d, dst node = %s, dst_input_index = %d, dst_input_offset = %d, " - "addr = %p", + "Propagate output of node %s, output index = %d, dst node = %s, " + "dst_input_index = %d, dst_input_offset = %d, addr = %p", node_item_->NodeName().c_str(), i, dst_node_item->NodeName().c_str(), dst_input_idx, dst_node_item->input_start + dst_input_idx, execution_context_->all_inputs.data() + dst_node_item->input_start + dst_input_idx); @@ -290,4 +290,4 @@ void TaskContext::ReleaseInput(int index) { } } } // namespace hybrid -} // namespace ge \ No newline at end of file +} // namespace ge diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc index 0a1178b1..5fcb0cd7 100644 --- a/src/ge/init/gelib.cc +++ b/src/ge/init/gelib.cc @@ -29,6 +29,7 @@ #include "common/profiling/profiling_manager.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" +#include "framework/common/util.h" #include "ge/ge_api_types.h" #include "ge_local_engine/engine/host_cpu_engine.h" #include "graph/ge_context.h" @@ -44,6 +45,7 @@ using Json = nlohmann::json; namespace ge { namespace { const int kDecimal = 10; +const int kSocVersionLen = 50; } // namespace static std::shared_ptr instancePtr_ = nullptr; @@ -57,15 +59,17 @@ Status GELib::Initialize(const map &options) { GELOGE(GE_CLI_INIT_FAILED, "GeLib initialize failed, malloc shared_ptr failed."); return GE_CLI_INIT_FAILED; } - Status ret = instancePtr_->SetRTSocVersion(options); + + map new_options; + Status ret = instancePtr_->SetRTSocVersion(options, new_options); if (ret != SUCCESS) { GELOGE(ret, "GeLib initial failed."); return ret; } - GetMutableGlobalOptions().insert(options.begin(), options.end()); + GetMutableGlobalOptions().insert(new_options.begin(), new_options.end()); GetThreadLocalContext().SetGlobalOption(GetMutableGlobalOptions()); GE_TIMESTAMP_START(Init); - ret = instancePtr_->InnerInitialize(options); + ret = instancePtr_->InnerInitialize(new_options); if (ret != SUCCESS) { GELOGE(ret, "GeLib initial failed."); instancePtr_ = nullptr; @@ -166,10 +170,11 @@ Status GELib::SystemInitialize(const map &options) { if (iter->second == std::to_string(enable_dump_flag) && path_iter != options.end()) { std::string dump_path = path_iter->second; if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { - dump_path += "/"; + dump_path = dump_path + "/" + CurrentTimeInStr() + "/"; } PropertiesManager::Instance().AddDumpPropertyValue(DUMP_ALL_MODEL, {}); + GELOGD("Get dump path %s successfully", dump_path.c_str()); PropertiesManager::Instance().SetDumpOutputPath(dump_path); } auto step_iter = options.find(OPTION_EXEC_DUMP_STEP); @@ -186,8 +191,11 @@ Status GELib::SystemInitialize(const map &options) { } } + // In train and infer, profiling is always needed. + InitOptions(options); + InitProfiling(this->options_); + if (is_train_mode_) { - InitOptions(options); status = InitSystemWithOptions(this->options_); } else { status = InitSystemWithoutOptions(); @@ -195,13 +203,30 @@ Status GELib::SystemInitialize(const map &options) { return status; } -Status GELib::SetRTSocVersion(const map &options) { - GELOGI("start SetRTSocVersion"); - auto it = options.find(ge::SOC_VERSION); - if (it != options.end()) { +void GELib::InitProfiling(Options &options) { + GELOGI("Init Profiling. session Id: %ld, device id:%d ", options.session_id, options.device_id); + std::lock_guard lock(status_mutex_); + GetContext().Init(); + // Profiling init + if (ProfilingManager::Instance().Init(options) != SUCCESS) { + GELOGW("Profiling init failed."); + } +} + +Status GELib::SetRTSocVersion(const map &options, map &new_options) { + GELOGI("Start to set SOC_VERSION"); + new_options.insert(options.begin(), options.end()); + auto it = new_options.find(ge::SOC_VERSION); + if (it != new_options.end()) { GE_CHK_RT_RET(rtSetSocVersion(it->second.c_str())); + GELOGI("Succeeded in setting SOC_VERSION[%s] to runtime.", it->second.c_str()); } else { - GELOGW("options not find SOC_VERSION"); + GELOGI("SOC_VERSION is not exist in options"); + char version[kSocVersionLen] = {0}; + rtError_t rt_ret = rtGetSocVersion(version, kSocVersionLen); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtGetSocVersion failed"); return FAILED;) + GELOGI("Succeeded in getting SOC_VERSION[%s] from runtime.", version); + new_options.insert(std::make_pair(ge::SOC_VERSION, version)); } return SUCCESS; } @@ -270,12 +295,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown, GELOGW("System init with options is already inited and not shutdown."); return SUCCESS); - GetContext().Init(); - - // profiling init - if (ProfilingManager::Instance().Init(options) != SUCCESS) { - GELOGW("Profiling init failed."); - } std::vector mem_type; mem_type.push_back(RT_MEMORY_HBM); @@ -331,12 +350,6 @@ Status GELib::SystemShutdownWithOptions(const Options &options) { Status ret = CsaInteract::GetInstance().WriteJobState(JOBSTATE_SUCCEED); GE_LOGE_IF(ret != SUCCESS, "write job state failed, ret:%u", ret); - if (!ProfilingManager::Instance().ProfilingOpTraceOn() && ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().StopProfiling(); - } - if (ProfilingManager::Instance().ProfilingOn()) { - ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); - } is_system_inited = false; is_shutdown = true; @@ -408,6 +421,9 @@ Status GELib::Finalize() { GELOGI("HostCpuEngine finalization."); HostCpuEngine::GetInstance().Finalize(); + // Shut down profiling + ShutDownProfiling(); + if (is_train_mode_) { GELOGI("System ShutDown."); mid_state = SystemShutdownWithOptions(this->options_); @@ -416,6 +432,7 @@ Status GELib::Finalize() { final_state = mid_state; } } + is_train_mode_ = false; GetMutableGlobalOptions().erase(ENABLE_SINGLE_STREAM); @@ -430,6 +447,17 @@ Status GELib::Finalize() { return SUCCESS; } +void GELib::ShutDownProfiling() { + std::lock_guard lock(status_mutex_); + + if (!ProfilingManager::Instance().ProfilingOpTraceOn() && ProfilingManager::Instance().ProfilingOn()) { + ProfilingManager::Instance().StopProfiling(); + } + if (ProfilingManager::Instance().ProfilingOn()) { + ProfilingManager::Instance().PluginUnInit(GE_PROFILING_MODULE); + } +} + // Get Singleton Instance std::shared_ptr GELib::GetInstance() { return instancePtr_; } diff --git a/src/ge/init/gelib.h b/src/ge/init/gelib.h index 81d36612..0dfec391 100644 --- a/src/ge/init/gelib.h +++ b/src/ge/init/gelib.h @@ -71,6 +71,9 @@ class GELib { // get incre build cache path const std::string &GetIncreBuildCachePath() const { return incre_build_cache_path_; } + void InitProfiling(Options &options); + void ShutDownProfiling(); + Status InitSystemWithoutOptions(); Status InitSystemWithOptions(Options &options); Status SystemShutdownWithOptions(const Options &options); @@ -80,7 +83,7 @@ class GELib { const GELib &operator=(const GELib &); Status InnerInitialize(const map &options); Status SystemInitialize(const map &options); - Status SetRTSocVersion(const map &options); + Status SetRTSocVersion(const map &options, map &new_options); void RollbackInit(); void InitOptions(const map &options); diff --git a/src/ge/ir_build/atc_ir_common.cc b/src/ge/ir_build/atc_ir_common.cc index 4a5b5bd4..12c85bc0 100644 --- a/src/ge/ir_build/atc_ir_common.cc +++ b/src/ge/ir_build/atc_ir_common.cc @@ -15,11 +15,11 @@ */ #include "atc_ir_common.h" +#include "common/util/error_manager/error_manager.h" +#include "external/ge/ge_api_types.h" #include "framework/common/string_util.h" #include "framework/common/types.h" #include "framework/common/util.h" -#include "common/util/error_manager/error_manager.h" -#include "external/ge/ge_api_types.h" using std::pair; using std::string; @@ -32,8 +32,8 @@ const int64_t kDynamicImageSizeNum = 2; // datatype/formats from user to GE, Unified to util interface file later const std::map kOutputTypeSupportDatatype = { {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; -const std::set kBufferOptimizeSupportOption = {"l1_optimize", "l2_optimize", "off_optimize", - "l1_and_l2_optimize"}; +const std::set kBufferOptimizeSupportOption = {"l2_optimize", "off_optimize"}; +const std::string IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT = "high_performance"; } // namespace bool CheckDynamicBatchSizeInputShapeValid(unordered_map> shape_map, @@ -168,14 +168,14 @@ Status CheckDynamicBatchSizeOrImageSizeParamValid(std::string &dynamic_batch_siz unordered_map> shape_map; vector>> user_shape_map; is_dynamic_input = true; - if (!ParseInputShape(input_shape, shape_map, user_shape_map, is_dynamic_input)) { - GELOGE(ge::PARAM_INVALID, "Failed to parse input shape: %s", input_shape.c_str()); + if (input_shape.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"input_shape"}); + GELOGE(ge::PARAM_INVALID, "The input_shape can not be empty in dynamic batchsize scenario."); return ge::PARAM_INVALID; } - if (shape_map.empty()) { - ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"input_shape"}); - GELOGE(ge::PARAM_INVALID, "The input_shape can not be empty in dynamic batchsize scenario."); + if (!ParseInputShape(input_shape, shape_map, user_shape_map, is_dynamic_input)) { + GELOGE(ge::PARAM_INVALID, "Failed to parse input shape: %s", input_shape.c_str()); return ge::PARAM_INVALID; } @@ -250,8 +250,11 @@ bool ParseInputShape(const string &input_shape, unordered_map &options, std::string tips) { + for (auto iter = options.begin(); iter != options.end(); iter++) { + std::string key = iter->first; + std::string option_name = iter->second; + GELOGI("%s set successfully, key=%s, value=%s", tips.c_str(), key.c_str(), option_name.c_str()); + } +} } // namespace ge diff --git a/src/ge/ir_build/atc_ir_common.h b/src/ge/ir_build/atc_ir_common.h index 361af850..b0a2b08b 100644 --- a/src/ge/ir_build/atc_ir_common.h +++ b/src/ge/ir_build/atc_ir_common.h @@ -30,8 +30,10 @@ namespace ge { -static std::set caffe_support_input_format = {"NCHW", "ND", "NCDHW"}; +static std::set caffe_support_input_format = {"NCHW", "ND"}; static std::set tf_support_input_format = {"NCHW", "NHWC", "ND", "NCDHW", "NDHWC"}; +static std::set onnx_support_input_format = {"NCHW", "ND"}; + static std::map input_format_str_to_geformat = { {"ND", domi::DOMI_TENSOR_ND}, {"NCHW", domi::DOMI_TENSOR_NCHW}, {"NHWC", domi::DOMI_TENSOR_NHWC}, {"CHWN", domi::DOMI_TENSOR_CHWN}, {"NC1HWC0", domi::DOMI_TENSOR_NC1HWC0}, {"NHWC1C0", domi::DOMI_TENSOR_NHWC1C0}, @@ -56,5 +58,10 @@ Status CheckOutputTypeParamValid(const std::string output_type); Status CheckBufferOptimizeParamValid(const std::string buffer_optimize); Status CheckCompressWeightParamValid(const std::string enable_compress_weight, const std::string compress_weight_conf); int CheckLogParamValidAndSetLogLevel(const std::string log); +Status CheckInsertOpConfParamValid(const std::string insert_op_conf); +Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory); +Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream); +Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); +void PrintOptionMap(std::map &options, std::string tips); } // namespace ge -#endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ \ No newline at end of file +#endif // FRAMEWORK_DOMI_ATC_IR_COMMON_H_ diff --git a/src/ge/ir_build/ge_ir_build.cc b/src/ge/ir_build/ge_ir_build.cc index 74b43215..0be75b51 100644 --- a/src/ge/ir_build/ge_ir_build.cc +++ b/src/ge/ir_build/ge_ir_build.cc @@ -45,10 +45,70 @@ const std::string IR_OPTION_TARGET = "target"; const std::string IR_OPTION_MODE = "mode"; const std::string IR_OP_CONF_DELIMITER = ":"; const std::string IR_OPTION_LOG_LEVEL_DEFAULT = "default"; +const std::string IR_OPTION_BUFFER_OPTIMIZE_DEFAULT = "l2_optimize"; +const std::string IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT = "0"; +const std::string IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT = "false"; } // namespace +static graphStatus CheckGlobalOptions(std::map &global_options) { + // check param disable_reuse_memory + std::string disable_reuse_memory = + global_options.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY) == global_options.end() + ? IR_OPTION_DISABLE_REUSE_MEMORY_DEFAULT + : global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY]; + GE_CHK_BOOL_EXEC(ge::CheckDisableReuseMemoryParamValid(disable_reuse_memory) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check disable_reuse_memory failed!"); + global_options[ge::ir_option::EXEC_DISABLE_REUSED_MEMORY] = disable_reuse_memory; + // check buffer_optimize + std::string buffer_optimize = global_options.find(ge::ir_option::BUFFER_OPTIMIZE) == global_options.end() + ? IR_OPTION_BUFFER_OPTIMIZE_DEFAULT + : global_options[ge::ir_option::BUFFER_OPTIMIZE]; + GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(buffer_optimize) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, + "check buffer optimize failed!"); + global_options[ge::ir_option::BUFFER_OPTIMIZE] = buffer_optimize; + // check enable_single_stream + std::string enable_single_stream = global_options.find(ge::ir_option::ENABLE_SINGLE_STREAM) == global_options.end() + ? "" + : global_options[ge::ir_option::ENABLE_SINGLE_STREAM]; + GE_CHK_BOOL_EXEC(ge::CheckEnableSingleStreamParamValid(enable_single_stream) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check enable single stream failed!"); + // check compress_weight + std::string enable_compress_weight = + global_options.find(ge::ir_option::ENABLE_COMPRESS_WEIGHT) == global_options.end() + ? IR_OPTION_ENABLE_COMPRESS_WEIGHT_DEFAULT + : global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT]; + std::string compress_weight_conf = global_options.find(ge::ir_option::COMPRESS_WEIGHT_CONF) == global_options.end() + ? "" + : global_options[ge::ir_option::COMPRESS_WEIGHT_CONF]; + GE_CHK_BOOL_EXEC(ge::CheckCompressWeightParamValid(enable_compress_weight, compress_weight_conf) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check compress weight failed!"); + global_options[ge::ir_option::ENABLE_COMPRESS_WEIGHT] = + (enable_compress_weight == "true") ? ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse; + // check optypelist_for_implmode and op_select_implmode + std::string optypelist_for_implmode = + global_options.find(ge::ir_option::OPTYPELIST_FOR_IMPLMODE) == global_options.end() + ? "" + : global_options[ge::ir_option::OPTYPELIST_FOR_IMPLMODE]; + std::string op_select_implmode = global_options.find(ge::ir_option::OP_SELECT_IMPL_MODE) == global_options.end() + ? "" + : global_options[ge::ir_option::OP_SELECT_IMPL_MODE]; + GE_CHK_BOOL_EXEC(ge::CheckImplmodeParamValid(optypelist_for_implmode, op_select_implmode) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check optypelist_for_implmode and op_select_implmode failed!"); + global_options[ge::ir_option::OP_SELECT_IMPL_MODE] = op_select_implmode; + + return GRAPH_SUCCESS; +} + graphStatus aclgrphBuildInitialize(std::map global_options) { GELOGD("Enter aclgrphInitialize start!"); + // check global options + if (CheckGlobalOptions(global_options) != GRAPH_SUCCESS) { + GELOGE(GRAPH_PARAM_INVALID, "Check global options falied!"); + return GRAPH_PARAM_INVALID; + } + // print global option map + ge::PrintOptionMap(global_options, "global option"); + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGI("aclgrphInitialize start!"); @@ -82,7 +142,7 @@ class Impl { GetContext().out_nodes_map.clear(); GetContext().user_out_nodes.clear(); GetContext().net_format = domi::DOMI_TENSOR_RESERVED; - GetContext().type = domi::FMK_TYPE_RESERVED; + GetContext().type = domi::FRAMEWORK_RESERVED; GetContext().run_mode = ONLY_PRE_CHECK; GetContext().train_flag = false; GetContext().fp16_high_precision = HIGH_PRECISION_DEFAULT; @@ -114,12 +174,7 @@ graphStatus Impl::CheckOptions(const std::map &options GELOGE(GRAPH_PARAM_INVALID, "input options include unsupported option(%s).Please check!", ele.first.c_str()); return GRAPH_PARAM_INVALID; } - - if (ele.first == ge::ir_option::ENABLE_COMPRESS_WEIGHT) { - continue; // this option will be set afer param check. - } else { - options_.insert(ele); - } + options_.insert(ele); } return GRAPH_SUCCESS; } @@ -127,13 +182,14 @@ graphStatus Impl::Init(const std::map &options) { // 1. check options graphStatus ret = CheckOptions(options); if (ret != GRAPH_SUCCESS) { - GELOGE(ret, "user input options is not illegal!Please check!"); + GELOGE(ret, "user input options are illegal! Please check!"); return ret; } // set log level std::string log = options_.find(ge::ir_option::LOG_LEVEL) == options_.end() ? IR_OPTION_LOG_LEVEL_DEFAULT : options_[ge::ir_option::LOG_LEVEL]; GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); + options_[ge::ir_option::LOG_LEVEL] = log; string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"]; string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; @@ -151,7 +207,7 @@ graphStatus Impl::Init(const std::map &options) { GELOGE(GRAPH_PARAM_INVALID, "check dynamic batch size or image size failed!"); return GRAPH_PARAM_INVALID; } - GELOGD("user input dynamic_batch_size:%s dynamic_image_size:%s", dynamic_batch_size.c_str(), + GELOGD("user input dynamic_batch_size:%s,dynamic_image_size:%s", dynamic_batch_size.c_str(), dynamic_image_size.c_str()); GetContext().dynamic_batch_size = dynamic_batch_size; GetContext().dynamic_image_size = dynamic_image_size; @@ -160,23 +216,11 @@ graphStatus Impl::Init(const std::map &options) { options_.find(ge::ir_option::OUTPUT_TYPE) == options_.end() ? "" : options_[ge::ir_option::OUTPUT_TYPE]; GE_CHK_BOOL_EXEC(ge::CheckOutputTypeParamValid(output_type) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, "check output type failed!"); - // check buffer_optimize - std::string buffer_optimize = - options_.find(ge::ir_option::BUFFER_OPTIMIZE) == options_.end() ? "" : options_[ge::ir_option::BUFFER_OPTIMIZE]; - GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(buffer_optimize) == ge::SUCCESS, return ge::GRAPH_PARAM_INVALID, - "check buffer optimize failed!"); - // check compress_weight - std::string enable_compress_weight = options_.find(ge::ir_option::ENABLE_COMPRESS_WEIGHT) == options_.end() - ? "" - : options_[ge::ir_option::ENABLE_COMPRESS_WEIGHT]; - std::string compress_weight_conf = options_.find(ge::ir_option::COMPRESS_WEIGHT_CONF) == options_.end() - ? "" - : options_[ge::ir_option::COMPRESS_WEIGHT_CONF]; - GE_CHK_BOOL_EXEC(ge::CheckCompressWeightParamValid(enable_compress_weight, compress_weight_conf) == ge::SUCCESS, - return ge::FAILED, "check compress weight failed!"); - options_.insert(std::pair( - std::string(ge::ir_option::ENABLE_COMPRESS_WEIGHT), - (enable_compress_weight == "true") ? ge::kEnableCompressWeightTrue : ge::kEnableCompressWeightFalse)); + // check insert_op_conf + std::string insert_op_conf = + options_.find(ge::ir_option::INSERT_OP_FILE) == options_.end() ? "" : options_[ge::ir_option::INSERT_OP_FILE]; + GE_CHK_BOOL_EXEC(ge::CheckInsertOpConfParamValid(std::string(insert_op_conf)) == ge::SUCCESS, + return ge::GRAPH_PARAM_INVALID, "check insert op conf failed!"); // for IR builder.Only support om mode, so here fixed; options_.insert(std::pair(string(IR_OPTION_MODE), to_string(0))); @@ -184,6 +228,8 @@ graphStatus Impl::Init(const std::map &options) { options_.insert(std::pair(string(ge::RUN_FLAG), to_string(0))); options_.insert(std::pair(string(ge::TRAIN_FLAG), to_string(0))); options_.insert(std::pair(string(ge::SAVE_ORIGINAL_MODEL), to_string(0))); + // print ge option map + ge::PrintOptionMap(options_, "ge option"); // 3. init generator with options_ ret = generator_.Initialize(options_); diff --git a/src/ge/module.mk b/src/ge/module.mk new file mode 100644 index 00000000..a3c14710 --- /dev/null +++ b/src/ge/module.mk @@ -0,0 +1,4 @@ +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/ge_inference.mk +include $(LOCAL_PATH)/ge_runner.mk diff --git a/src/ge/offline/main.cc b/src/ge/offline/main.cc new file mode 100644 index 00000000..27309c1a --- /dev/null +++ b/src/ge/offline/main.cc @@ -0,0 +1,1195 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/gflags_util.h" +#include "common/util.h" +#include "common/util/error_manager/error_manager.h" +#include "framework/common/debug/ge_log.h" +#include "ge/ge_api.h" +#include "generator/ge_generator.h" +#include "graph/anchor.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/graph.h" +#include "graph/op_desc.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/type_utils.h" +#include "init/gelib.h" +#include "ir_build/atc_ir_common.h" +#include "omg/omg.h" +#include "omg/parser/parser_factory.h" +#include "parser/common/register_tbe.h" +#include "register/op_registry.h" +#include "single_op_parser.h" + +using domi::BuildMode; +using domi::GetContext; +using domi::OpRegistrationData; +using domi::OpRegistry; +using domi::Status; +using domi::SUCCESS; +using ge::GEN_OM_MODEL; +using ge::GflagsUtils; +using ge::MODEL_TO_JSON; +using ge::ONLY_PRE_CHECK; +using ge::ParseInputShape; +using ge::PBTXT_TO_JSON; +using std::map; +using std::pair; +using std::shared_ptr; +using std::string; +using std::vector; + +static bool is_dynamic_input = false; + +// 310 limited 8G size +const char *const kGraphMemoryManagerMallocMaxSize = "8*1024*1024*1024"; + +DEFINE_string(model, "", "The model file."); +DEFINE_string(output, "", "The output file path&name."); +DEFINE_int32(framework, -1, "Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)."); +DEFINE_string(weight, "", "Optional; weight file. Required when framework is Caffe."); + +DEFINE_string(input_shape, "", + "Optional; shape of input data. Required when framework is caffe " + "or TensorFLow or MindSpore." + "Format: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\""); +DEFINE_bool(h, false, "show this help message"); +DEFINE_string(cal_conf, "", "Optional; the calibration config file."); + +DEFINE_string(insert_op_conf, "", "Optional; the config file to insert new op, for example AIPP op."); +DEFINE_string(op_name_map, "", "Optional; custom op name mapping file."); + +DEFINE_string(target, "", "Optional; mini."); + +DEFINE_string(om, "", "The model file to be converted to json."); +DEFINE_string(json, "", "The output json file path&name which is converted from a model."); +DEFINE_int32(mode, 0, + "Optional; run mode, 0(default): model => framework model; 1: " + "framework model => json; 3: only pre-check; 5: pbtxt => json."); + +#if !defined(__ANDROID__) && !defined(ANDROID) +DEFINE_int32(encrypt_mode, -1, "Optional; the encrypt flag. 0: encrypt; -1(default): not encrypt"); +DEFINE_string(encrypt_key, "", "Optional; the encrypt_key file."); +DEFINE_string(certificate, "", "Optional; the certificate file."); +DEFINE_string(hardware_key, "", "Optional; the ISV key file."); +DEFINE_string(private_key, "", "Optional; the private key file."); +#endif + +DEFINE_string(out_nodes, "", + "Optional; output nodes designated by users." + "Format: \"node_name1:0;node_name1:1;node_name2:0\""); + +DEFINE_string(precision_mode, "", + "Optional; precision mode." + "Support force_fp16, allow_mix_precision, allow_fp32_to_fp16, must_keep_origin_dtype."); + +DEFINE_string(input_format, "", + "Optional; input_format, format of input data, NCHW;NHWC." + "Format:\"NHWC\""); + +DEFINE_string(check_report, "check_result.json", "Optional; the pre-checking report file."); + +DEFINE_string(input_fp16_nodes, "", + "Optional; input node datatype is fp16 and format is NC1HWC0." + "Format:\"node_name1;node_name2\""); + +DEFINE_string(is_output_adjust_hw_layout, "", + "Optional; Net output node's datatype is fp16 and format is " + "NC1HWC0, or not." + "Format:\"false,true,false,true\""); + +DEFINE_string(is_input_adjust_hw_layout, "", + "Optional; Intput node's datatype is fp16 and format is " + "NC1HWC0, or not." + "Format:\"false,true,false,true\""); + +DEFINE_string(output_type, "", + "Optional; output type! " + "Support FP32,FP16,INT8,INT16,UINT16,UINT8,INT32,INT64,UINT32,UINT64,DOUBLE."); + +DEFINE_string(op_select_implmode, "", + "Optional; op select implmode! " + "Support high_precision, high_performance."); + +DEFINE_string(optypelist_for_implmode, "", + "Optional; Nodes need use implmode selected in op_select_implmode " + "Format:\"node_name1,node_name2\""); + +DEFINE_string(head_stream, "0", + "Optional; Is need head stream, default is not need." + "Format: \"0: no head stream; 1: add head stream;\""); + +DEFINE_string(singleop, "", "Optional; If set, generate single op model with the given json file."); + +DEFINE_int32(disable_reuse_memory, 0, "Optional; If set to 1, disable reuse memory when generating if."); + +DEFINE_string(auto_tune_mode, "", "Optional; Set tune mode."); + +DEFINE_string(soc_version, "", "The soc version."); + +DEFINE_string(core_type, "AiCore", "Optional; If set to VectorCore, only use vector core."); + +DEFINE_string(aicore_num, "", "Optional; Set aicore num"); + +DEFINE_string(buffer_optimize, "l2_optimize", "Optional; buffer optimize"); + +DEFINE_string(fusion_switch_file, "", "Optional; Set fusion switch file path"); + +DEFINE_string(save_original_model, "", "Optional; enable output original offline model. false(default)"); + +DEFINE_string(dynamic_batch_size, "", + "Optional; If set, generate dynamic multi batch model. " + "Different batch sizes are split by ','." + "dynamic_batch_size and dynamic_imagesize can only be set one."); + +DEFINE_string(dynamic_image_size, "", + "Optional; If set, generate dynamic multi image size model." + "Different groups of image size are split by ';'," + "while different dimensions of each group are split by ','." + "dynamic_batch_size and dynamic_imagesize can only be set one."); + +DEFINE_string(enable_small_channel, "0", "Optional; If set to 1, small channel is enabled."); + +DEFINE_bool(enable_compress_weight, false, "Optional; enable compress weight. true: enable; false(default): disable"); + +DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress weight"); + +DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable"); + +DEFINE_string(quant_optimize, "true", "Optional; enable quant optimize. true: enable; false(default): disable"); + +DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null"); + +DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); + +class GFlagUtils { + public: + /** + * @name InitGFlag + * @brief initialize gflag + * @return void + */ + static void InitGFlag(int argc, char *argv[]) { + // -help + gflags::SetUsageMessage( + "usage: ./atc \n" + "generate offline model example:\n" + "./atc --model=./alexnet.prototxt --weight=./alexnet.caffemodel \n" + "--framework=0 --output=./domi \n" + "generate offline model for single op example:\n" + "./atc --singleop=./op_list.json --output=./op_model \n" + "arguments explain:\n" + " --model Model file\n" + " --singleop Single op definition file. atc will generate offline " + "model(s) for single op if --singleop is set. \n" + " Note: Only output, soc_verion, core_type, aicore_num, auto_tune_mode, precision_mode, " + "op_select_implmode, enable_small_channel, enable_compress_weight, compress_weight_conf " + "enable_single_stream and log are valid in this mode \n" + " --weight Weight file. Required when framework is Caffe\n" + " --framework Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)\n" + " --output Output file path&name(needn't suffix, will add " + ".om automatically). \n" + " If --singleop is set, this arg specifies the directory to " + "which the single op offline model will be generated\n" + " --input_shape Shape of input data. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument." + "E.g.: \"input_name1:n1,c1,h1,w1;input_name2:n2,c2,h2,w2\"\n" + " --h/help Show this help message\n" + " --log Generate atc log. Support debug, info, warning, error, null\n" + " --insert_op_conf Config file to insert new op\n" + " --op_name_map Custom op name mapping file\n" + " Note: A semicolon(;) cannot be included in each " + "path, otherwise the resolved path will not match the expected one.\n" + " --precision_mode precision mode, support force_fp16, allow_mix_precision, " + "allow_fp32_to_fp16, must_keep_origin_dtype.\n" + " --om The model file to be converted to json\n" + " --json The output json file path&name which is " + "converted from a model\n" + " --mode Run mode. 0(default): model => framework model 1: " + "framework model => json; 3: only pre-check; 5: pbtxt => json\n" + " --dump_mode The switch of dump json with shape, to be used with mode 1.Default value is : 0." + "0 means disable, 1 means enable .\n" + " --input_format Format of input data. E.g.: \"NCHW\"\n" + " --check_report The pre-checking report file. Default value is: " + "\"check_result.json\"\n" + " --disable_reuse_memory The switch of reuse memory. Default value is : 0." + "0 means reuse memory, 1 means do not reuse memory.\n" + " --input_fp16_nodes Input node datatype is fp16 and format is NCHW. Separate multiple nodes with semicolons " + "(;)." + "Use double quotation marks (\") to enclose each argument." + "E.g.: \"node_name1;node_name2\"\n" + " --is_input_adjust_hw_layout Intput node datatype is fp16 and format is " + "NC1HWC0, used with input_fp16_nodes E.g.: \"true,true,false,true\"\n" + " --out_nodes Output nodes designated by users. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument." + "E.g.: \"node_name1:0;node_name1:1;node_name2:0\"\n" + " --is_output_adjust_hw_layout Net output node datatype is fp16 and format is " + "NC1HWC0, used with out_nodes. E.g.: \"true,true,false,true\"\n" + " --output_type Set net output type. Support FP32, FP16, UINT8." + "E.g.: FP16, means all out nodes set datatype FP16." + "\"node_name1:0:FP16;node_name2:1:FP32\", means multiple out nodes set corresponding datatype.\n" + " --op_select_implmode Set op select implmode. Support high_precision, high_performance." + "default: high_performance\n" + "disable\n" + " --head_stream Add head stream. 0(default): disable; 1: enable\n" + " --soc_version The soc version. E.g.: \"Ascend310\"\n" + " --core_type Set core type AiCore or VectorCore. VectorCore: use vector core. " + "Default value is: AiCore\n" + " --enable_compress_weight Enable compress weight. true: enable; false(default): disable\n" + " --compress_weight_conf Config file to compress weight\n" + " --aicore_num Set aicore num\n" + " --buffer_optimize Set buffer optimize. default enabled, set \"off_optimize\" to close \n" + " --enable_small_channel Set enable small channel. 0(default): disable; 1: enable\n" + " --fusion_switch_file Set fusion switch file path\n" + " --save_original_model Control whether to output original model. " + "E.g.: true: output original model\"\n" + " --dynamic_batch_size Set dynamic batch size. E.g: \"batchsize1,batchsize2,batchsize3\"\n" + " --dynamic_image_size Set dynamic image size. Separate multiple nodes with semicolons (;)." + "Use double quotation marks (\") to enclose each argument." + "E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n" + " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" + " --enable_single_stream Enable single stream. true: enable; false(default): disable\n" + " --quant_optimize Enable quant optimize. true(default): enable; false: disable\n"); + + gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); + // Using gflags to analyze input parameters + GflagsUtils::ChangeHelpFlags(FLAGS_h); + gflags::HandleCommandLineHelpFlags(); + } + + static Status CheckDumpInfershapeJsonFlags() { + Status ret = CheckFrameWorkValid(FLAGS_framework, FLAGS_weight); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_weight != "" && !ge::CheckInputPathValid(FLAGS_weight, "weight"), + return domi::FAILED, "Input parameter[--weight]'s value[%s] is invalid!", + FLAGS_weight.c_str()); + return domi::SUCCESS; + } + + static Status CheckFlags() { + // No model file information passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_model == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"model"}); + return domi::PARAM_INVALID, "Input parameter[--model]'s value is empty!"); + // check param disable_reuse_memory + GE_CHK_BOOL_EXEC(ge::CheckDisableReuseMemoryParamValid(to_string(FLAGS_disable_reuse_memory)) == ge::SUCCESS, + return ge::FAILED, "check disable_reuse_memory failed!"); + + // check optypelist_for_implmode and op_select_implmode + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode, FLAGS_op_select_implmode) != ge::SUCCESS, + return ge::FAILED, "check optypelist_for_implmode and op_select_implmode failed!"); + // No output file information passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_mode == GEN_OM_MODEL && FLAGS_output == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); + return domi::PARAM_INVALID, "Input parameter[--output]'s value is empty!"); + + Status ret = CheckFrameWorkValid(FLAGS_framework, FLAGS_weight); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "CheckFrameWorkValid failed"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ge::CheckDynamicBatchSizeOrImageSizeParamValid( + FLAGS_dynamic_batch_size, FLAGS_dynamic_image_size, FLAGS_input_shape, + FLAGS_input_format, is_dynamic_input) != ge::SUCCESS, + return ge::FAILED, "check dynamic batch size or image size failed!"); + +#if !defined(__ANDROID__) && !defined(ANDROID) + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!CheckEncryptModeValid(FLAGS_encrypt_mode), return domi::FAILED, + "encrypt_mode %d not valid!!", FLAGS_encrypt_mode); + + if (FLAGS_encrypt_mode == 0) { // Encryption mode + GELOGI("domi will run with encrypt!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_encrypt_key), return domi::FAILED, + "encrypt_key file %s not found!!", FLAGS_encrypt_key.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_certificate), return domi::FAILED, + "certificate file %s not found!!", FLAGS_certificate.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_hardware_key), return domi::FAILED, + "hardware_key file %s not found!!", FLAGS_hardware_key.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_private_key), return domi::FAILED, + "private_key file %s not found!!", FLAGS_private_key.c_str()); + } else { // No encryption + GELOGI("domi will run without encrypt!"); + } +#endif + + /** + * Check the validity of the I / O file path + */ + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_model, "model"), return domi::FAILED, + "model file %s not found!!", FLAGS_model.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_weight != "" && !ge::CheckInputPathValid(FLAGS_weight, "weight"), + return domi::FAILED, "weight file %s not found!!", FLAGS_weight.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_cal_conf != "" && !ge::CheckInputPathValid(FLAGS_cal_conf, "cal_conf"), + return domi::FAILED, "calibration config file %s not found!!", + FLAGS_cal_conf.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_op_name_map != "" && !ge::CheckInputPathValid(FLAGS_op_name_map, "op_name_map"), return domi::FAILED, + "op config file %s not found!!", FLAGS_op_name_map.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_head_stream != "" && FLAGS_head_stream != "0" && FLAGS_head_stream != "1", + ErrorManager::GetInstance().ATCReportErrMessage("E10006", {"parameter"}, {"head_stream"}); + return domi::FAILED, "Input parameter[--head_stream] must be 0 or 1!!"); + + GE_CHK_BOOL_EXEC(ge::CheckInsertOpConfParamValid(std::string(FLAGS_insert_op_conf)) == ge::SUCCESS, + return ge::FAILED, "check insert op conf failed!"); + + GE_CHK_BOOL_EXEC( + ge::CheckCompressWeightParamValid(FLAGS_enable_compress_weight ? std::string("true") : std::string("false"), + FLAGS_compress_weight_conf) == ge::SUCCESS, + return ge::FAILED, "check compress weight failed!"); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckOutputPathValid(FLAGS_check_report, "check_report"), return domi::FAILED, + "check_report file %s not found!!", FLAGS_check_report.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_mode == GEN_OM_MODEL && (!ge::CheckOutputPathValid(FLAGS_output) || !CheckPathWithName(FLAGS_output)), + return domi::FAILED, "output path %s is not valid!!", FLAGS_output.c_str()); + + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + FLAGS_save_original_model != "" && FLAGS_save_original_model != "true" && FLAGS_save_original_model != "false", + ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, + {"save_original_model", FLAGS_save_original_model}); + return domi::FAILED, "Input parameter[--save_original_model]'s value[%s] must be true or false.", + FLAGS_save_original_model.c_str()); + GE_CHK_BOOL_EXEC(ge::CheckBufferOptimizeParamValid(FLAGS_buffer_optimize) == ge::SUCCESS, return ge::FAILED, + "check output type failed!"); + + GE_CHK_BOOL_EXEC(ge::CheckEnableSingleStreamParamValid(std::string(FLAGS_enable_single_stream)) == ge::SUCCESS, + return ge::FAILED, "check enable single stream failed!"); + + return domi::SUCCESS; + } + + /** + * Verifying the parameters of converting model to JSON + * 1. Fmk_model + * 2. out_json + **/ + static Status CheckConverJsonParamFlags() { + // No model path passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_om == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"om"}); + return domi::PARAM_INVALID, "Input parameter[--om]'s value is empty!!"); + + // JSON path not passed in + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(FLAGS_json == "", + ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"json"}); + return domi::PARAM_INVALID, "Input parameter[--json]'s value is empty!!"); + // Check if the model path is valid + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckInputPathValid(FLAGS_om, "om"), return domi::PARAM_INVALID, + "model file path is invalid: %s.", FLAGS_om.c_str()); + // Check whether the JSON path is valid + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(!ge::CheckOutputPathValid(FLAGS_json, "om"), return domi::PARAM_INVALID, + "json file path is invalid: %s.", FLAGS_json.c_str()); + + return domi::SUCCESS; + } + + /** + * Check command line parameters for explicit settings + * true: Explicit setup + * false: Not set up + * */ + static bool CheckFlagSet(string flag) { + gflags::CommandLineFlagInfo info; + return !(gflags::GetCommandLineFlagInfo(flag.c_str(), &info) && info.is_default); + } + + private: + static bool CheckEncryptModeValid(const int encrypt_mode) { +#if !defined(__ANDROID__) && !defined(ANDROID) + if (encrypt_mode != 0 && encrypt_mode != -1) { + DOMI_LOGE("encrypt mode must be 0 or -1"); + return false; + } +#else + if (encrypt_mode != -1) { + DOMI_LOGE("encrypt mode must be -1"); + return false; + } +#endif + + return true; + } + + static Status CheckFrameWorkValid(int framework, const std::string weight_file) { + if (framework != (int32_t)domi::CAFFE && framework != (int32_t)domi::TENSORFLOW && + framework != (int32_t)domi::MINDSPORE && framework != (int32_t)domi::ONNX) { + // No framework information was passed in or the entered framework is illegal + ErrorManager::GetInstance().ATCReportErrMessage("E10007", {"parameter"}, {"framework"}); + DOMI_LOGE( + "Input parameter[--framework] is mandatory and it's value must be: " + "0(Caffe) or 1(MindSpore) or 3(TensorFlow)."); + return domi::PARAM_INVALID; + } + + if ((framework == (int32_t)domi::CAFFE) && (weight_file == "")) { + ErrorManager::GetInstance().ATCReportErrMessage("E10008", {"parameter"}, {"weight"}); + DOMI_LOGE("Input parameter[--weight]'s value is empty when framework is 0(CAFFE)!"); + return domi::PARAM_INVALID; + } + + if ((framework == (int32_t)domi::TENSORFLOW) && (weight_file != "")) { + GELOGW("Parameter weight is ignored for TensorFlow."); + } + + if ((framework == (int32_t)domi::ONNX) && (weight_file != "")) { + GELOGW("Parameter weight is ignored for Onnx."); + } + return domi::SUCCESS; + } + + static bool CheckPathWithName(const std::string &fileName) { + // Determine file path length + if (fileName.size() > static_cast(PATH_MAX)) { + ErrorManager::GetInstance().ATCReportErrMessage("E10021", {"parameter", "size"}, + {"output", std::to_string(PATH_MAX)}); + GELOGE(ge::FAILED, "Input parameter[--output]'s path is too long, it must be less than %d", PATH_MAX); + return false; + } + + // Find the last separator + int slashPosition = fileName.size() - 1; + for (; slashPosition >= 0; slashPosition--) { + if (fileName[slashPosition] == '\\' || fileName[slashPosition] == '/') { + break; + } + } + + // Failure if no filename follows the path + if (slashPosition == static_cast(fileName.size() - 1)) { + ErrorManager::GetInstance().ATCReportErrMessage("E10022", {"parameter", "filename"}, {"output", fileName}); + DOMI_LOGE("Input parameter[--output]'s path[%s] not include file name", fileName.c_str()); + return false; + } + + return true; + } +}; + +void SetDynamicBatchSizeOrImagesizeOptions() { + if (!FLAGS_dynamic_batch_size.empty()) { + domi::GetContext().dynamic_batch_size = FLAGS_dynamic_batch_size; + } + if (!FLAGS_dynamic_image_size.empty()) { + domi::GetContext().dynamic_image_size = FLAGS_dynamic_image_size; + } +} + +static bool CheckInputFormat() { + if (FLAGS_input_format.empty()) { + // Set default format + if (FLAGS_framework == static_cast(domi::TENSORFLOW)) { + FLAGS_input_format = "NHWC"; + } else { + FLAGS_input_format = "NCHW"; + } + return true; + } else if ((FLAGS_framework == static_cast(domi::CAFFE))) { // caffe + if (ge::caffe_support_input_format.find(FLAGS_input_format) != ge::caffe_support_input_format.end()) { + return true; + } + ErrorManager::GetInstance().ATCReportErrMessage("E10031", {"value"}, {FLAGS_input_format}); + // only support NCHW ND + GELOGE(ge::FAILED, + "Input parameter[--input_format]'s value[%s] is wrong, " + "only support NCHW, ND in Caffe model.", + FLAGS_input_format.c_str()); + return false; + } else if ((FLAGS_framework == static_cast(domi::TENSORFLOW))) { // tf + if (ge::tf_support_input_format.find(FLAGS_input_format) != ge::tf_support_input_format.end()) { + return true; + } + ErrorManager::GetInstance().ATCReportErrMessage("E10032", {"value"}, {FLAGS_input_format}); + // only support NCHW NHWC ND NCDHW NDHWC + GELOGE(ge::FAILED, + "Input parameter[--input_format]'s value[%s] is wrong, " + "only support NCHW, NHWC, ND, NCDHW, NDHWC in tf model", + FLAGS_input_format.c_str()); + return false; + } else if (FLAGS_framework == static_cast(domi::ONNX)) { + if (ge::onnx_support_input_format.find(FLAGS_input_format) != ge::onnx_support_input_format.end()) { + return true; + } + // only support NCHW ND + GELOGE(ge::FAILED, "Input parameter[--input_format]'s value[%s] is error, Only support NCHW, ND in onnx model", + FLAGS_input_format.c_str()); + return false; + } + return true; +} + +#if !defined(__ANDROID__) && !defined(ANDROID) +static void GetCustomOpPath(std::string &customop_path) { + GELOGI("Enter get custom op path schedule"); + std::string fmk_type = ge::TypeUtils::FmkTypeToSerialString(static_cast(FLAGS_framework)); + GELOGI("Framework type is %s.", fmk_type.c_str()); + + const char *path_env = std::getenv("ASCEND_OPP_PATH"); + if (path_env != nullptr) { + std::string path = path_env; + customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); + GELOGI("Get custom so path from env : %s", path_env); + return; + } + std::string path_base = ge::GELib::GetPath(); + GELOGI("path_base is %s", path_base.c_str()); + path_base = path_base.substr(0, path_base.rfind('/')); + path_base = path_base.substr(0, path_base.rfind('/') + 1); + customop_path = (path_base + "ops/framework/custom" + "/:") + (path_base + "ops/framework/built-in/" + fmk_type); + return; +} + +void GetPluginSoFileList(const string &path, vector &fileList, string &caffe_parser_path) { + // Support to split multiple so directories by ":" + GELOGI("path is %s", path.c_str()); + vector v_path = ge::StringUtils::Split(path, ':'); + for (size_t i = 0; i < v_path.size(); ++i) { + ge::FindParserSo(v_path[i], fileList, caffe_parser_path); + GELOGI("CustomOpLib full name = %s", v_path[i].c_str()); + } +} + +void LoadModelParserLib(std::string caffe_parser_path) { + if (FLAGS_framework == static_cast(domi::TENSORFLOW)) { + void *tf_handle = dlopen("libfmk_tensorflow_parser.so", RTLD_NOW | RTLD_GLOBAL); + if (tf_handle == nullptr) { + GELOGW("dlopen fmk library [libfmk_tensorflow_parser.so] failed."); + return; + } + GELOGI("plugin load libfmk_tensorflow_parser.so success."); + } else if (FLAGS_framework == static_cast(domi::CAFFE)) { + // What we are dealing with here is that the user modifies the caffe.proto scenario. + // If no lib_Caffe_Parser.so is found under the plugin path, use the default lib_Caffe_Parser.so path. + caffe_parser_path = caffe_parser_path.empty() ? "lib_caffe_parser.so" : caffe_parser_path; + + void *handle = dlopen(caffe_parser_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGW("dlopen failed, plugin name:%s. Message(%s).", caffe_parser_path.c_str(), dlerror()); + return; + } + GELOGI("plugin load %s success.", caffe_parser_path.c_str()); + // According to the dependency, the Caffe parsing module of the framework is loaded here( libfmk_caffe_parser.so). + // (depend on the lib_caffe_parser.so) + void *fmk_handle = dlopen("libfmk_caffe_parser.so", RTLD_NOW | RTLD_GLOBAL); + if (fmk_handle == nullptr) { + GELOGW("dlopen fmk library [libfmk_caffe_parser.so] failed."); + if (dlclose(handle) != 0) { + GELOGW("dlclose lib_caffe_parser.so failed."); + } + return; + } + GELOGI("plugin load libfmk_caffe_parser.so success."); + } else if (FLAGS_framework == static_cast(domi::ONNX)) { + void *handle = dlopen("libfmk_onnx_parser.so", RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGW("dlopen fmk library [libfmk_onnx_parser.so] failed."); + return; + } + GELOGI("plugin load libfmk_onnx_parser.so success."); + } else { + GELOGW("Framework:%s is not support.", + ge::TypeUtils::FmkTypeToSerialString(static_cast(FLAGS_framework)).c_str()); + return; + } + return; +} + +void LoadCustomOpLib() { + OpRegistry::Instance()->registrationDatas.clear(); + std::string plugin_path; + GetCustomOpPath(plugin_path); + + vector fileList; + string caffe_parser_path = ""; + + // whether there are files in the plugin so path + GetPluginSoFileList(plugin_path, fileList, caffe_parser_path); + + // no file + if (fileList.empty() && caffe_parser_path.empty()) { + GELOGW("can not find any plugin file in plugin_path: %s", plugin_path.c_str()); + } + + LoadModelParserLib(caffe_parser_path); + + // load other so files except lib_caffe_parser.so in the plugin so path + for (auto elem : fileList) { + ge::StringUtils::Trim(elem); + + void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL); + if (handle == nullptr) { + GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror()); + } else { + GELOGI("plugin load %s success.", elem.c_str()); + } + } + + std::vector registrationDatas = OpRegistry::Instance()->registrationDatas; + for (OpRegistrationData reg_data : registrationDatas) { + bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data); + if (ret) { + OpRegistry::Instance()->Register(reg_data); + } + } +} + +#endif + +Status CreateInputsForInference(const ge::Graph &graph, vector &inputs) { + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (ge::NodePtr &input_node : compute_graph->GetAllNodes()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == ge::DATA) { + GELOGI("Data op inputDesc size is: %zu", op->GetAllInputsDesc().size()); + ge::GeTensorDesc tensor = op->GetInputDesc(0); + string data_op_name = op->GetName(); + GELOGI("Data op name is: %s", data_op_name.c_str()); + ge::GeShape data_shape; + auto iter = GetContext().input_dims.find(data_op_name); + if (iter != GetContext().input_dims.end()) { + data_shape = ge::GeShape(iter->second); + GELOGI("Data op get shape from Context."); + } else { + data_shape = tensor.GetShape(); + GELOGI("Data op get shape from InputDesc in geir graph."); + } + + ge::DataType data_type = tensor.GetDataType(); + string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); + GELOGI("Data op get data type:%s from InputDesc in geir graph.", data_type_str.c_str()); + + ge::GeTensor input_tensor; + ge::GeTensorDesc desc(data_shape, ge::Format(GetContext().format), data_type); + input_tensor.SetTensorDesc(desc); + inputs.push_back(input_tensor); + } + } + GELOGI("Build ME model, inputs size is: %zu", inputs.size()); + return ge::SUCCESS; +} + +void ChangeStringToBool(std::string &arg_s, bool arg_b) { + if (arg_s == "true") { + arg_b = true; + } else { + arg_b = false; + } + return; +} + +domi::Status GenerateInfershapeJson() { + if (!CheckInputFormat()) { + GELOGE(ge::FAILED, "Check input_format failed"); + return domi::FAILED; + } + Status ret = GFlagUtils::CheckDumpInfershapeJsonFlags(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "Check flags failed!"); + + // Load custom operator Library + LoadCustomOpLib(); + ge::GeGenerator ge_generator; + std::map options; + ge::Status geRet = ge_generator.Initialize(options); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GeGenerator initialize failed!"); + return domi::FAILED; + } + + ge::Graph graph; + std::map atc_params; + atc_params.insert(std::pair("input_format", FLAGS_input_format)); + ret = ParseGraph(graph, atc_params, FLAGS_om.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, "", + FLAGS_target.c_str(), (ge::RunMode)FLAGS_mode, false); + if (ret != ge::SUCCESS) { + DOMI_LOGE("ATC Parse graph domi::FAILED"); + (void)ge_generator.Finalize(); + return domi::FAILED; + } + + geRet = ge_generator.GenerateInfershapeGraph(graph); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("ATC GenerateInfershapeJson failed"); + (void)ge_generator.Finalize(); + return domi::FAILED; + } + if (DumpInfershapeJson(graph, FLAGS_json.c_str()) != SUCCESS) { + DOMI_LOGE("ATC DumpInfershapeJson failed"); + (void)ge_generator.Finalize(); + return domi::FAILED; + } + (void)ge_generator.Finalize(); + return ge::SUCCESS; +} + +static Status ConvertModelToJson(int fwk_type, const string &model_file, const string &json_file) { + Status ret = domi::SUCCESS; + if (fwk_type == -1) { + ret = ge::ConvertOmModelToJson(model_file.c_str(), json_file.c_str()); + return ret; + } + + if ((fwk_type != domi::TENSORFLOW) && (fwk_type != domi::CAFFE)) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10068", {"param", "value", "supports"}, + {"framework", std::to_string(fwk_type), "only support 0(Caffe) 3(TensorFlow)"}); + GELOGE(ge::FAILED, "Input parameter[--framework] is mandatory and it's value must be: 0(Caffe) 3(TensorFlow)."); + return ge::FAILED; + } + + // Since the Caffe model's conversion to JSON file depends on lib_caffe_parser.so, loadcustomoplib is called here. + LoadCustomOpLib(); + + if (FLAGS_dump_mode == "0") { + ret = ge::ConvertFwkModelToJson((domi::FrameworkType)fwk_type, model_file.c_str(), json_file.c_str()); + return ret; + } else if (FLAGS_dump_mode == "1") { + ret = GenerateInfershapeJson(); + return ret; + } else { + GELOGE(ge::FAILED, "Input parameter[--dump_mode]'s value must be 1 or 0."); + return ge::FAILED; + } +} + +domi::Status GenerateModel(std::map &options, std::string output) { + ge::GeGenerator ge_generator; + ge::Status geRet = ge::SUCCESS; + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + geRet = ge::GELib::Initialize(options); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GE initialize failed!"); + return domi::FAILED; + } + } + geRet = ge_generator.Initialize(options); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GeGenerator initialize failed!"); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + ge::Graph graph; + std::vector inputs; + if (FLAGS_framework == domi::MINDSPORE) { + // load model from file + ge::Model load_model = ge::Model("loadmodel", "version2"); + auto ret1 = load_model.LoadFromFile(FLAGS_model); + if (ret1 != ge::GRAPH_SUCCESS) { + ErrorManager::GetInstance().ATCReportErrMessage("E10056", {"parameter"}, {FLAGS_model}); + DOMI_LOGE( + "Load model from %s failed, please check model file or " + "input parameter[--framework] is correct", + FLAGS_model.c_str()); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + graph = load_model.GetGraph(); + + GE_CHK_STATUS_EXEC(ge::InitDomiOmgContext(FLAGS_input_shape, FLAGS_input_format, "", is_dynamic_input), + GELOGE(ge::FAILED, "ATC Generate call InitDomiOmgContext ret fail"); + (void)ge_generator.Finalize(); (void)ge::GELib::GetInstance()->Finalize(); return domi::FAILED); + + Status ret = CreateInputsForInference(graph, inputs); + if (ret != ge::SUCCESS) { + GELOGE(ge::FAILED, "create inputs for inference failed."); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + if (SetOutputNodeInfo(graph, "", "") != domi::SUCCESS) { + GELOGE(ge::FAILED, "Set output node info fail."); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + } else { + std::map atc_params; + atc_params.insert(std::pair("input_shape", FLAGS_input_shape)); + atc_params.insert(std::pair("out_nodes", FLAGS_out_nodes)); + atc_params.insert(std::pair("input_format", FLAGS_input_format)); + atc_params.insert(std::pair("check_report", FLAGS_check_report)); + atc_params.insert(std::pair("input_fp16_nodes", FLAGS_input_fp16_nodes)); + atc_params.insert(std::pair("is_input_adjust_hw_layout", FLAGS_is_input_adjust_hw_layout)); + atc_params.insert(std::pair("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout)); + atc_params.insert(std::pair("compress_weight_conf", FLAGS_compress_weight_conf)); + atc_params.insert(std::pair(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); + + Status ret = + ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework, + FLAGS_op_name_map.c_str(), FLAGS_target.c_str(), (ge::RunMode)FLAGS_mode, is_dynamic_input); + + // in ONLY_PRE_CHECK mode, pre-checking report has already saved in ParseGraph + if (FLAGS_mode == ge::ONLY_PRE_CHECK) { + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + if (ret != ge::SUCCESS) { + DOMI_LOGE("ATC precheck fail."); + return domi::FAILED; + } + return domi::SUCCESS; + } + + if (ret != ge::SUCCESS) { + DOMI_LOGE("ATC Parse graph domi::FAILED"); + DOMI_LOGE("ATC Generate execute failed"); // Duplicate log. (for test case + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + if (SetOutputNodeInfo(graph, FLAGS_output_type, "") != domi::SUCCESS) { + DOMI_LOGE("Set output node info fail."); + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + } + + geRet = ge_generator.GenerateOfflineModel(graph, output, inputs); + if (geRet != ge::SUCCESS) { + DOMI_LOGE("GE GenerateOfflineModel execute failed"); + DOMI_LOGE("ATC Generate execute failed"); // Duplicate log. (for test case + // checking error log) + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return ge::SUCCESS; +} + +static void SetEnvForSingleOp(std::map &options) { + string flag_on = "1"; + string flag_off = "0"; + options.emplace(ge::GE_FE_FLAG, flag_on); + options.emplace(ge::STREAM_NUM, "1"); // single op only use one stream + options.emplace(ge::RUN_FLAG, flag_off); + options.emplace(ge::OPTION_GRAPH_RUN_MODE, flag_off); + options.emplace(ge::SINGLE_OP_FLAG, flag_on); + options.emplace(ge::PRECISION_MODE, FLAGS_precision_mode); + options.emplace(ge::SOC_VERSION, FLAGS_soc_version); + options.emplace(ge::CORE_TYPE, FLAGS_core_type); + options.emplace(ge::AICORE_NUM, FLAGS_aicore_num); + options.emplace(ge::OP_SELECT_IMPL_MODE, FLAGS_op_select_implmode); + options.emplace(ge::OPTYPELIST_FOR_IMPLMODE, FLAGS_optypelist_for_implmode); + options.emplace(ge::AUTO_TUNE_MODE, FLAGS_auto_tune_mode); + options.emplace(ge::GRAPH_MEMORY_MAX_SIZE, kGraphMemoryManagerMallocMaxSize); +} + +domi::Status GenerateSingleOp(const std::string &json_file_path) { + if (!FLAGS_output.empty() && !ge::CheckOutputPathValid(FLAGS_output)) { + DOMI_LOGE("output path %s is not valid!", FLAGS_output.c_str()); + return domi::FAILED; + } + // check optypelist_for_implmode and op_select_implmode + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( + ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode, FLAGS_op_select_implmode) != ge::SUCCESS, + return ge::FAILED, "check optypelist_for_implmode and op_select_implmode failed!"); + + std::map options; + // need to be changed when ge.ini plan is done + SetEnvForSingleOp(options); + + vector build_params; + if (ge::SingleOpParser::ParseSingleOpList(json_file_path, build_params) != ge::SUCCESS) { + DOMI_LOGE("parse single op json file failed"); + return domi::FAILED; + } + + auto ret = ge::GELib::Initialize(options); + if (ret != ge::SUCCESS) { + DOMI_LOGE("GE initialize failed!"); + return domi::FAILED; + } + + ge::GeGenerator generator; + ret = generator.Initialize(options); + if (ret != SUCCESS) { + DOMI_LOGE("GeGenerator initialize failed!"); + (void)ge::GELib::GetInstance()->Finalize(); + return domi::FAILED; + } + + int index = 0; + for (auto ¶m : build_params) { + string output_path; + if (!FLAGS_output.empty()) { + output_path = FLAGS_output + "/"; + } + output_path += param.file_name; + ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); + if (ret != SUCCESS) { + DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); + ret = domi::FAILED; + break; + } + GELOGI("Compile op success. op index = %d, output = %s", index, output_path.c_str()); + index += 1; + } + + (void)generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); + return ret; +} + +domi::Status GenerateOmModel() { + if (!CheckInputFormat()) { + GELOGE(ge::FAILED, "Check input_format failed"); + return domi::FAILED; + } + Status ret = GFlagUtils::CheckFlags(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, + "Check flags failed! Please check whether some atc params that include semicolons[;] use double " + "quotation marks (\") to enclose each argument such as out_nodes, input_shape, dynamic_image_size"); +#if !defined(__ANDROID__) && !defined(ANDROID) + // Load custom operator Library + LoadCustomOpLib(); + + ret = ge::CheckCustomAiCpuOpLib(); + + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!"); +#endif + + const int f_stream_num = 1; + std::map options; + options.insert(std::pair(string(ge::FRAMEWORK_TYPE), to_string(FLAGS_framework))); + options.insert(std::pair(string(ge::STREAM_NUM), to_string(f_stream_num))); + options.insert(std::pair(string(ge::CALIBRATION_CONF_FILE), FLAGS_cal_conf)); + options.insert(std::pair(string(ge::ENCRYPT_MODE), to_string(FLAGS_encrypt_mode))); + options.insert(std::pair(string(ge::EK_FILE), FLAGS_encrypt_key)); + options.insert(std::pair(string(ge::CERT_FILE), FLAGS_certificate)); + options.insert(std::pair(string(ge::HW_KEY_FILE), FLAGS_hardware_key)); + options.insert(std::pair(string(ge::PRIVATE_KEY_FILE), FLAGS_private_key)); + options.insert(std::pair(string(ge::OUTPUT_NODE_NAME), FLAGS_out_nodes)); + options.insert(std::pair(string(ge::INSERT_OP_FILE), FLAGS_insert_op_conf)); + options.insert(std::pair(string(ge::PRECISION_MODE), FLAGS_precision_mode)); + + options.insert(std::pair(string(ge::RUN_FLAG), to_string(0))); + options.insert(std::pair(string(ge::TRAIN_FLAG), to_string(0))); + + if (!FLAGS_output_type.empty()) { + options.insert(std::pair(string(ge::OUTPUT_DATATYPE), FLAGS_output_type)); + } + + options.insert(std::pair(string(ge::OP_SELECT_IMPL_MODE), FLAGS_op_select_implmode)); + options.insert(std::pair(string(ge::OPTYPELIST_FOR_IMPLMODE), FLAGS_optypelist_for_implmode)); + + if (!FLAGS_input_fp16_nodes.empty()) { + GELOGI("FLAGS_input_fp16_nodes : %s .", FLAGS_input_fp16_nodes.c_str()); + options.insert(std::pair(ge::INPUT_FP16_NODES, FLAGS_input_fp16_nodes)); + } + + options.insert(std::pair(string(ge::HEAD_STREAM), FLAGS_head_stream)); + + options.insert(std::pair(string(ge::AUTO_TUNE_MODE), FLAGS_auto_tune_mode)); + + options.insert( + std::pair(string(ge::OPTION_EXEC_DISABLE_REUSED_MEMORY), to_string(FLAGS_disable_reuse_memory))); + + options.insert(std::pair(string(ge::SOC_VERSION), FLAGS_soc_version)); + + options.insert(std::pair(string(ge::CORE_TYPE), FLAGS_core_type)); + + options.insert(std::pair(string(ge::AICORE_NUM), FLAGS_aicore_num)); + + options.insert(std::pair(string(ge::BUFFER_OPTIMIZE), FLAGS_buffer_optimize)); + + options.insert(std::pair(string(ge::ENABLE_SMALL_CHANNEL), FLAGS_enable_small_channel)); + + options.insert(std::pair(string(ge::FUSION_SWITCH_FILE), FLAGS_fusion_switch_file)); + + options.insert(std::pair(string(ge::ENABLE_COMPRESS_WEIGHT), FLAGS_enable_compress_weight + ? ge::kEnableCompressWeightTrue + : ge::kEnableCompressWeightFalse)); + + options.insert(std::pair(string(ge::GRAPH_MEMORY_MAX_SIZE), kGraphMemoryManagerMallocMaxSize)); + + options.insert(std::pair(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream)); + + options.insert(std::pair(string(ge::QUANT_OPTIMIZE), FLAGS_quant_optimize)); + + SetDynamicBatchSizeOrImagesizeOptions(); + + if (!FLAGS_save_original_model.empty()) { + options.insert(std::pair(string(ge::SAVE_ORIGINAL_MODEL), FLAGS_save_original_model)); + options.insert(std::pair(string(ge::ORIGINAL_MODEL_FILE), FLAGS_output + "_original.om")); + } + + // print atc option map + ge::PrintOptionMap(options, "atc option"); + + // When the ATC module is transferred to a model, the suffix ".om" is automatically added to the model name + FLAGS_output = FLAGS_output + ".om"; + ret = GenerateModel(options, FLAGS_output); + if (ret != domi::SUCCESS) { + return domi::FAILED; + } + + return domi::SUCCESS; +} + +domi::Status ConvertModelToJson() { + Status ret = GFlagUtils::CheckConverJsonParamFlags(); + GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "Check convert json params flags failed!"); + + ret = ConvertModelToJson(FLAGS_framework, FLAGS_om, FLAGS_json); + + GE_IF_BOOL_EXEC(ret != domi::SUCCESS, return domi::FAILED); + return domi::SUCCESS; +} + +bool CheckRet(domi::Status ret, ge::Status geRet) { + if (ret != domi::SUCCESS || geRet != ge::SUCCESS) { + if (FLAGS_mode == ONLY_PRE_CHECK) { + GELOGW("ATC precheck failed."); + } else if (FLAGS_mode == GEN_OM_MODEL) { + GELOGW("ATC generate offline model failed."); + } else if (FLAGS_mode == MODEL_TO_JSON) { + GELOGW("ATC convert model to json file failed."); + } else if (FLAGS_mode == PBTXT_TO_JSON) { + GELOGW("ATC convert pbtxt to json file failed."); + } else { + return false; + } + return false; + } + + if (FLAGS_mode == ONLY_PRE_CHECK) { + GELOGI("ATC precheck success."); + } else if (FLAGS_mode == GEN_OM_MODEL) { + GELOGI("ATC generate offline model success."); + } else if (FLAGS_mode == MODEL_TO_JSON) { + GELOGI("ATC convert model to json file success."); + } else if (FLAGS_mode == PBTXT_TO_JSON) { + GELOGI("ATC convert pbtxt to json file success."); + } + return true; +} + +domi::Status ConvertPbtxtToJson() { + Status ret = GFlagUtils::CheckConverJsonParamFlags(); + if (ret != domi::SUCCESS) { + GELOGE(ge::FAILED, "Check convert json params flags failed!"); + return domi::FAILED; + } + + ret = ge::ConvertPbtxtToJson(FLAGS_om.c_str(), FLAGS_json.c_str()); + if (ret != domi::SUCCESS) { + GELOGE(ge::FAILED, "ConvertPbtxtToJson fail."); + return domi::FAILED; + } + + return domi::SUCCESS; +} + +int init(int argc, char *argv[]) { + GFlagUtils::InitGFlag(argc, argv); + // set log level + int ret = -1; + const std::set log_level = {"default", "null", "debug", "info", "warning", "error"}; + if (log_level.count(FLAGS_log) == 0) { + std::cout << "E10016: invalid value for --log:" << FLAGS_log << ", only support debug, info, warning, error, null" + << std::endl; + return ret; + } + + ret = ge::CheckLogParamValidAndSetLogLevel(FLAGS_log); + if (ret != 0) { + return ret; + } + + return 0; +} + +int main(int argc, char *argv[]) { + Status ret = domi::SUCCESS; + ge::Status geRet = ge::SUCCESS; + std::cout << "ATC start working now, please wait for a moment." << std::endl; + try { + // Initialize + if (init(argc, argv) != 0) { + std::cout << "ATC run failed, Please check the detail log, Try \'atc --help\' for more information" << std::endl; + return -1; + } + + do { + if (!FLAGS_singleop.empty()) { + ret = GenerateSingleOp(FLAGS_singleop); + break; + } + + // default mode(mode:0), Open source model to model + if (GEN_OM_MODEL == FLAGS_mode || ONLY_PRE_CHECK == FLAGS_mode) { + GE_IF_BOOL_EXEC(GenerateOmModel() != domi::SUCCESS, ret = domi::FAILED; break); + } else if (MODEL_TO_JSON == FLAGS_mode) { // Mode 1, transfer model to JSON + GE_CHK_BOOL_EXEC(ConvertModelToJson() == domi::SUCCESS, ret = domi::FAILED; + break, "ATC ConvertJson execute failed!!"); + } else if (FLAGS_mode == ge::RunMode::PBTXT_TO_JSON) { + GE_CHK_BOOL_EXEC(ConvertPbtxtToJson() == domi::SUCCESS, ret = domi::FAILED; + break, "ATC convert pbtxt to json execute failed!!"); + } else { + ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"value"}, {std::to_string(FLAGS_mode)}); + DOMI_LOGE( + "Invalid value for --mode[%d], only support " + "0(model to framework model), 1(framework model to json), 3(only pre-check), " + "5(pbtxt to json)!", + FLAGS_mode); + ret = domi::FAILED; + break; + } + } while (0); + } catch (std::bad_alloc) { + ret = domi::FAILED; + DOMI_LOGE("ATC run failed, bad memory allocation occur !"); + std::cout << "ATC run failed, bad memory allocation occur !" << std::endl; + } catch (...) { + ret = domi::FAILED; + DOMI_LOGE("ATC run failed, some exceptions occur !"); + std::cout << "ATC run failed, some exceptions occur !" << std::endl; + } + + if (!CheckRet(ret, geRet)) { + std::cout << "ATC run failed, Please check the detail log, Try \'atc --help\' for more information" << std::endl; + return ret; + } else { + std::cout << "ATC run success, welcome to the next use." << std::endl; + return 0; + } +} diff --git a/src/ge/offline/module.mk b/src/ge/offline/module.mk new file mode 100644 index 00000000..c97e7813 --- /dev/null +++ b/src/ge/offline/module.mk @@ -0,0 +1,53 @@ + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := atc + +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 + +LOCAL_SRC_FILES := \ + main.cc \ + single_op_parser.cc \ + ../session/omg.cc \ + ../ir_build/atc_ir_common.cc \ + +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH)/../ ./ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/external/graph \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/domi \ + $(TOPDIR)libc_sec/include \ + $(TOPDIR)inc/common/util \ + third_party/json/include \ + third_party/gflags/include \ + third_party/protobuf/include \ + proto/om.proto \ + proto/ge_ir.proto \ + proto/task.proto \ + proto/insert_op.proto \ + +LOCAL_SHARED_LIBRARIES := \ + libc_sec \ + libge_common \ + libprotobuf \ + libslog \ + libgraph \ + libregister \ + liberror_manager \ + libge_compiler \ + libruntime_compile \ + libparser_common \ + libfmk_tensorflow_parser \ + liberror_manager \ + +LOCAL_STATIC_LIBRARIES := libgflags + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_EXECUTABLE) + diff --git a/src/ge/offline/single_op_parser.cc b/src/ge/offline/single_op_parser.cc new file mode 100644 index 00000000..067d39e2 --- /dev/null +++ b/src/ge/offline/single_op_parser.cc @@ -0,0 +1,354 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "single_op_parser.h" + +#include +#include +#include +#include + +#include + +#include "framework/common/debug/ge_log.h" +#include "common/util/error_manager/error_manager.h" +#include "common/ge_inner_error_codes.h" +#include "framework/common/util.h" +#include "graph/utils/tensor_utils.h" + +using Json = nlohmann::json; +using std::map; +using std::string; +using std::vector; + +namespace ge { +namespace { +constexpr char const *kKeyOp = "op"; +constexpr char const *kKeyInputDesc = "input_desc"; +constexpr char const *kKeyOutputDesc = "output_desc"; +constexpr char const *kKeyAttr = "attr"; +constexpr char const *kKeyName = "name"; +constexpr char const *kKeyType = "type"; +constexpr char const *kKeyShape = "shape"; +constexpr char const *kKeyValue = "value"; +constexpr char const *kKeyFormat = "format"; +constexpr char const *kFileSuffix = ".om"; +constexpr int kDumpJsonIndent = 2; + +map kAttrTypeDict = { + {"bool", GeAttrValue::VT_BOOL}, + {"int", GeAttrValue::VT_INT}, + {"float", GeAttrValue::VT_FLOAT}, + {"string", GeAttrValue::VT_STRING}, + {"list_bool", GeAttrValue::VT_LIST_BOOL}, + {"list_int", GeAttrValue::VT_LIST_INT}, + {"list_float", GeAttrValue::VT_LIST_FLOAT}, + {"list_string", GeAttrValue::VT_LIST_STRING}, + {"list_list_int", GeAttrValue::VT_LIST_LIST_INT}, +}; + +map kDataTypeDict = { + {"bool", DT_BOOL}, {"int8", DT_INT8}, {"uint8", DT_UINT8}, {"int16", DT_INT16}, {"uint16", DT_UINT16}, + {"int32", DT_INT32}, {"uint32", DT_UINT32}, {"int64", DT_INT64}, {"uint64", DT_UINT64}, {"float16", DT_FLOAT16}, + {"half", DT_FLOAT16}, {"fp16", DT_FLOAT16}, {"float", DT_FLOAT}, {"float32", DT_FLOAT}, {"double", DT_DOUBLE}, +}; + +map kFormatDict = { + {"nchw", FORMAT_NCHW}, {"nhwc", FORMAT_NHWC}, {"nd", FORMAT_ND}, {"fractal_nz", FORMAT_FRACTAL_NZ}, + {"fractal_z", FORMAT_FRACTAL_Z}, {"nc1hwc0", FORMAT_NC1HWC0}, +}; +} // namespace + +template +void SetAttrValue(const Json &j, SingleOpAttr &attr) { + attr.value.SetValue(j.at(kKeyValue).get()); +} + +template +T GetValue(const map &dict, string &key, T default_val) { + transform(key.begin(), key.end(), key.begin(), ::tolower); + auto it = dict.find(key); + if (it == dict.end()) { + return default_val; + } + + return it->second; +} + +void from_json(const Json &j, SingleOpTensorDesc &desc) { + desc.dims = j.at(kKeyShape).get>(); + string format_str = j.at(kKeyFormat).get(); + string type_str = j.at(kKeyType).get(); + desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED); + desc.type = GetValue(kDataTypeDict, type_str, DT_UNDEFINED); + auto tensor_name = j.find(kKeyName); + if (tensor_name != j.end()) { + desc.name = tensor_name->get(); + } +} + +void from_json(const Json &j, SingleOpAttr &attr) { + attr.name = j.at(kKeyName).get(); + attr.type = j.at(kKeyType).get(); + auto it = kAttrTypeDict.find(attr.type); + if (it == kAttrTypeDict.end()) { + GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str()); + return; + } + + switch (it->second) { + case GeAttrValue::VT_BOOL: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_INT: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_FLOAT: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_STRING: + SetAttrValue(j, attr); + break; + case GeAttrValue::VT_LIST_BOOL: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_INT: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_FLOAT: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_STRING: + SetAttrValue>(j, attr); + break; + case GeAttrValue::VT_LIST_LIST_INT: + SetAttrValue>>(j, attr); + break; + default: + GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str()); + break; + } +} + +void from_json(const Json &j, SingleOpDesc &desc) { + desc.op = j.at(kKeyOp).get(); + + auto input_desc = j.find(kKeyInputDesc); + if (input_desc != j.end()) { + desc.input_desc = input_desc->get>(); + } + + auto output_desc = j.find(kKeyOutputDesc); + if (output_desc != j.end()) { + desc.output_desc = output_desc->get>(); + } + + auto attr_field = j.find(kKeyAttr); + if (attr_field != j.end()) { + desc.attrs = attr_field->get>(); + } +} + +Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { + std::string real_path = RealPath(file.c_str()); + if (real_path.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10023", {"value"}, {file}); + GELOGE(FAILED, "Input parameter[--singleop]'s value[%s] is not a valid path.", file.c_str()); + return INTERNAL_ERROR; + } + + std::ifstream ifs(real_path); + if (!ifs.is_open()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10024", {"value"}, {file}); + GELOGE(FAILED, "Open file[%s] provided in input parameter[--singleop] failed.", file.c_str()); + return FAILED; + } + try { + ifs >> json_obj; + } catch (const std::exception &e) { + ErrorManager::GetInstance().ATCReportErrMessage("E10025", {"realpath", "errmsg"}, {real_path, e.what()}); + GELOGE(PARAM_INVALID, "Parse file[%s] provided in input parameter[--singleop] failed, exception = %s.", + real_path.c_str(), e.what()); + return PARAM_INVALID; + } + + ifs.close(); + return SUCCESS; +} + +bool SingleOpParser::Validate(const SingleOpDesc &op_desc) { + if (op_desc.op.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10026"); + GELOGE(PARAM_INVALID, "Op name is empty"); + return false; + } + + int index = 0; + for (auto &tensor_desc : op_desc.input_desc) { + if (tensor_desc.type == DT_UNDEFINED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"input", std::to_string(index)}); + GELOGE(false, "Input index[%d]'s dataType is invalid", index); + return false; + } + + if (tensor_desc.format == FORMAT_RESERVED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"input", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Input index[%d]'s format is invalid", index); + return false; + } + ++index; + } + + index = 0; + for (auto &tensor_desc : op_desc.output_desc) { + if (tensor_desc.type == DT_UNDEFINED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"output", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Output[%d] dataType is invalid", index); + return false; + } + + if (tensor_desc.format == FORMAT_RESERVED) { + ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"output", std::to_string(index)}); + GELOGE(PARAM_INVALID, "Output[%d] format is invalid", index); + return false; + } + ++index; + } + + for (auto &attr : op_desc.attrs) { + if (attr.name.empty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10029"); + GELOGE(PARAM_INVALID, "attr name is empty"); + return false; + } + + if (attr.value.IsEmpty()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10030", {"attrname"}, {attr.name}); + GELOGE(PARAM_INVALID, "Parse attr \"%s\" failed. ", attr.name.c_str()); + return false; + } + } + + return true; +} + +OpDesc *SingleOpParser::CreateOpDesc(const string &op_type) { return new (std::nothrow) OpDesc(op_type, op_type); } + +Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single_op_desc, + SingleOpBuildParam &build_param) { + auto *op_desc = CreateOpDesc(single_op_desc.op); + if (op_desc == nullptr) { + GELOGE(MEMALLOC_FAILED, "Failed to create instance of opDesc"); + return MEMALLOC_FAILED; + } + + std::stringstream file_name; + file_name << index; + file_name << "_" << single_op_desc.op; + for (auto &desc : single_op_desc.input_desc) { + file_name << "_" << desc.type << "_" << desc.format; + for (auto dim : desc.dims) { + file_name << "_" << dim; + } + GeTensorDesc ge_tensor_desc(GeShape(desc.dims), desc.format, desc.type); + ge_tensor_desc.SetOriginFormat(desc.format); + TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size()); + TensorUtils::SetInputTensor(ge_tensor_desc, true); + TensorUtils::SetOutputTensor(ge_tensor_desc, false); + if (desc.name.empty()) { + op_desc->AddInputDesc(ge_tensor_desc); + } else { + op_desc->AddInputDesc(desc.name, ge_tensor_desc); + } + if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { + ge_tensor_desc.SetFormat(FORMAT_ND); + ge_tensor_desc.SetOriginFormat(FORMAT_ND); + } + build_param.inputs.emplace_back(ge_tensor_desc); + } + + for (auto &desc : single_op_desc.output_desc) { + file_name << "_" << desc.type << "_" << desc.format; + for (auto dim : desc.dims) { + file_name << "_" << dim; + } + + GeTensorDesc ge_tensor_desc(GeShape(desc.dims), desc.format, desc.type); + ge_tensor_desc.SetOriginFormat(desc.format); + TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size()); + TensorUtils::SetInputTensor(ge_tensor_desc, false); + TensorUtils::SetOutputTensor(ge_tensor_desc, true); + op_desc->AddOutputDesc(ge_tensor_desc); + if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) { + ge_tensor_desc.SetFormat(FORMAT_ND); + ge_tensor_desc.SetOriginFormat(FORMAT_ND); + } + build_param.outputs.emplace_back(ge_tensor_desc); + } + + for (const auto &attr : single_op_desc.attrs) { + op_desc->SetAttr(attr.name, attr.value); + } + + file_name << kFileSuffix; + build_param.file_name = file_name.str(); + + build_param.op_desc.reset(op_desc); + return SUCCESS; +} + +Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector &op_list) { + Json single_op_list_json; + auto ret = ReadJsonFile(file, single_op_list_json); + if (ret != SUCCESS) { + return ret; + } + + int index = 0; + for (const Json &single_op_json : single_op_list_json) { + GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str()); + SingleOpDesc single_op_desc; + try { + single_op_desc = single_op_json; + } catch (const nlohmann::json::exception &e) { + ErrorManager::GetInstance().ATCReportErrMessage( + "E10045", {"index", "jsonfile", "exception", "jsonStr"}, + {std::to_string(index), file, e.what(), single_op_json.dump(kDumpJsonIndent)}); + GELOGE(PARAM_INVALID, "Parse op[%d] failed when read json file[%s], exception[%s], jsonStr[%s]", index, + file.c_str(), e.what(), single_op_json.dump(kDumpJsonIndent).c_str()); + return PARAM_INVALID; + } + + if (!Validate(single_op_desc)) { + ErrorManager::GetInstance().ATCReportErrMessage("E10046", {"index", "jsonfile"}, {std::to_string(index), file}); + GELOGE(PARAM_INVALID, "Validate op[%d] failed when read json file[%s].", index, file.c_str()); + return PARAM_INVALID; + } + + SingleOpBuildParam param; + ret = ConvertToBuildParam(index, single_op_desc, param); + if (ret != SUCCESS) { + return ret; + } + + op_list.emplace_back(param); + GELOGI("Parse op[%d] success", index); + index += 1; + } + + return SUCCESS; +} +} // namespace ge diff --git a/src/ge/offline/single_op_parser.h b/src/ge/offline/single_op_parser.h new file mode 100644 index 00000000..13c2e565 --- /dev/null +++ b/src/ge/offline/single_op_parser.h @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ACL_TOOLS_COMPILE_PARSER_H +#define ACL_TOOLS_COMPILE_PARSER_H + +#include +#include + +#include + +#include "ge/ge_api_error_codes.h" +#include "graph/types.h" +#include "graph/ge_attr_value.h" +#include "graph/op_desc.h" + +namespace ge { +struct SingleOpTensorDesc { + std::string name; + std::vector dims; + ge::Format format = ge::FORMAT_RESERVED; + ge::DataType type = ge::DT_UNDEFINED; +}; + +struct SingleOpAttr { + std::string name; + std::string type; + ge::GeAttrValue value; +}; + +struct SingleOpDesc { + std::string op; + std::vector input_desc; + std::vector output_desc; + std::vector attrs; +}; + +struct SingleOpBuildParam { + ge::OpDescPtr op_desc; + std::vector inputs; + std::vector outputs; + std::string file_name; +}; + +void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); + +void from_json(const nlohmann::json &json, SingleOpAttr &desc); + +void from_json(const nlohmann::json &json, SingleOpDesc &desc); + +class SingleOpParser { + public: + static Status ParseSingleOpList(const std::string &file, std::vector &op_list); + + private: + static Status ReadJsonFile(const std::string &file, nlohmann::json &json_obj); + static bool Validate(const SingleOpDesc &op_desc); + static OpDesc *CreateOpDesc(const std::string &op_type); + static Status ConvertToBuildParam(int index, const SingleOpDesc &single_op_desc, SingleOpBuildParam &build_param); +}; +} // namespace ge + +#endif // ACL_TOOLS_COMPILE_PARSER_H diff --git a/src/ge/plugin/engine/module.mk b/src/ge/plugin/engine/module.mk new file mode 100644 index 00000000..170cfc68 --- /dev/null +++ b/src/ge/plugin/engine/module.mk @@ -0,0 +1,59 @@ + +LOCAL_PATH := $(call my-dir) + +COMMON_LOCAL_SRC_FILES := \ + dnnengines.cc \ + engine_manage.cc \ + + +COMMON_LOCAL_C_INCLUDES := \ + $(LOCAL_PATH) \ + $(LOCAL_PATH)/../ \ + $(LOCAL_PATH)/../../ \ + $(TOPDIR)inc \ + $(TOPDIR)inc/external \ + $(TOPDIR)inc/framework \ + $(TOPDIR)inc/framework/common \ + +#compiler for host libengine +include $(CLEAR_VARS) + +LOCAL_SHARED_LIBRARIES := \ + libslog + +LOCAL_MODULE := libengine +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_HOST_SHARED_LIBRARY) + + +#compiler for device libengine +include $(CLEAR_VARS) + +LOCAL_SHARED_LIBRARIES := \ + libslog + +LOCAL_MODULE := libengine +LOCAL_CFLAGS += -Werror +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 +ifeq ($(DEBUG), 1) +LOCAL_CFLAGS += -g -O0 +endif + +LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) + +LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) + +LOCAL_LDFLAGS := -lrt -ldl + +include $(BUILD_SHARED_LIBRARY) \ No newline at end of file diff --git a/src/ge/session/inner_session.cc b/src/ge/session/inner_session.cc index 74a43d96..74495e82 100644 --- a/src/ge/session/inner_session.cc +++ b/src/ge/session/inner_session.cc @@ -180,6 +180,30 @@ Status InnerSession::RegisterCallBackFunc( return SUCCESS; } +Status InnerSession::BuildGraph(uint32_t graph_id, const std::vector &inputs) { + UpdateThreadContext(graph_id); + GELOGI("[InnerSession:%lu] build graph on session, graph_id=%u.", session_id_, graph_id); + std::vector ge_inputs; + for (auto const &input : inputs) { + std::vector input_dims; + std::transform(input.dims.begin(), input.dims.end(), std::back_inserter(input_dims), + [](int64_t x) -> int64_t { return x; }); + GeShape input_shape(input_dims); + GeTensorDesc input_tensor_desc; + input_tensor_desc.SetShape(input_shape); + input_tensor_desc.SetDataType(static_cast(input.data_type)); + ge_inputs.emplace_back(input_tensor_desc); + } + GeRootModelPtr ge_root_model = nullptr; + Status ret = graph_manager_.BuildGraph(graph_id, ge_inputs, ge_root_model, session_id_, true); + if (ret != SUCCESS) { + GELOGE(ret, "[InnerSession:%lu] build graph failed, graph_id=%u.", session_id_, graph_id); + return ret; + } + GELOGI("[InnerSession:%lu] build graph success, graph_id=%u.", session_id_, graph_id); + return ret; +} + Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector &inputs, RunAsyncCallback callback) { UpdateThreadContext(graph_id); diff --git a/src/ge/session/inner_session.h b/src/ge/session/inner_session.h index 3b009a44..bcc47354 100644 --- a/src/ge/session/inner_session.h +++ b/src/ge/session/inner_session.h @@ -41,6 +41,8 @@ class InnerSession { Status RemoveGraph(uint32_t graph_id); + Status BuildGraph(uint32_t graph_id, const std::vector &inputs); + Status RunGraphAsync(uint32_t graph_id, const std::vector &inputs, RunAsyncCallback callback); Status Finalize(); diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc new file mode 100644 index 00000000..8fe31624 --- /dev/null +++ b/src/ge/session/omg.cc @@ -0,0 +1,909 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "omg/omg.h" +#include +#include +#include +#include "common/auth/file_saver.h" +#include "common/convert/pb2json.h" +#include "common/debug/log.h" +#include "common/debug/memory_dumper.h" +#include "common/model_parser/base.h" +#include "common/model_saver.h" +#include "common/properties_manager.h" +#include "common/string_util.h" +#include "common/types.h" +#include "common/util.h" +#include "common/util/error_manager/error_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/omg/parser/parser_inner_ctx.h" +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "graph/compute_graph.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/optimize/common/params.h" +#include "graph/utils/type_utils.h" +#include "ir_build/atc_ir_common.h" +#include "omg/omg_inner_types.h" +#include "omg/parser/model_parser.h" +#include "omg/parser/parser_factory.h" +#include "omg/parser/weights_parser.h" +#include "parser/common/pre_checker.h" +#include "proto/ge_ir.pb.h" +#include "register/op_registry.h" + +using nlohmann::json; +using ProcParam = struct PROC_PARAM; +using domi::ModelParserFactory; +using domi::WeightsParserFactory; +using std::ostringstream; + +namespace google { +namespace protobuf { +namespace io { +class FileOutputStream; +} +} // namespace protobuf +} // namespace google +namespace ge { +namespace { +const std::string kGraphDefaultName = "domi_default"; +const std::string kScopeIdAttr = "fusion_scope"; +} // namespace + +// When the model is converted to a JSON file, the following operator attributes in the blacklist will be ignored +const std::set kOmBlackFields = {"output", "data_offset", "data", "workspace", "workspace_bytes", + "memory_size", "weight_size", "size", "bt", "quantize_factor"}; + +static std::map output_type_str_to_datatype = { + {"FP32", ge::DT_FLOAT}, {"FP16", ge::DT_FLOAT16}, {"UINT8", ge::DT_UINT8}}; + +static bool CheckInputTrueOrFalse(const std::string &s, const std::string &atc_param) { + if ((s == "true") || (s == "false")) { + return true; + } else { + ErrorManager::GetInstance().ATCReportErrMessage("E10033", {"parameter", "value"}, {atc_param, s}); + GELOGE(PARAM_INVALID, "Input parameter[--%s]'s value[%s] must be true or false.", atc_param.c_str(), s.c_str()); + return false; + } +} + +static void ParseAtcParms(const std::map &atc_params, const std::string &key, + std::string ¶m) { + auto iter = atc_params.find(key); + if (iter != atc_params.end()) { + param = iter->second; + } +} + +static Status CheckInputShapeNode(const ComputeGraphPtr &graph) { + for (auto it : domi::GetContext().user_input_dims) { + std::string node_name = it.first; + ge::NodePtr node = graph->FindNode(node_name); + if (node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage("E10034", {"parameter", "opname"}, {"input_shape", node_name}); + GELOGE(PARAM_INVALID, "Input parameter[--input_shape]'s opname[%s] is not exist in model", node_name.c_str()); + return PARAM_INVALID; + } + if (node->GetType() != DATA) { + ErrorManager::GetInstance().ATCReportErrMessage("E10035", {"parameter", "opname"}, {"input_shape", node_name}); + GELOGE(PARAM_INVALID, "Input parameter[--input_shape]'s opname[%s] is not a input opname", node_name.c_str()); + return PARAM_INVALID; + } + } + return SUCCESS; +} + +static Status CheckInputFp16Nodes(const ComputeGraphPtr &graph, const string &input_fp16_nodes, + const string &is_input_adjust_hw_layout) { + GE_CHECK_NOTNULL(graph); + vector adjust_fp16_format_vec; + if (!is_input_adjust_hw_layout.empty()) { + adjust_fp16_format_vec = StringUtils::Split(is_input_adjust_hw_layout, ','); + for (auto &s : adjust_fp16_format_vec) { + StringUtils::Trim(s); + if (!CheckInputTrueOrFalse(s, "is_input_adjust_hw_layout")) { + GELOGE(PARAM_INVALID, "Invalid Param, is_input_adjust_hw_layout only support true/false: but is [%s]", + is_input_adjust_hw_layout.c_str()); + return PARAM_INVALID; + } + } + } + if (input_fp16_nodes.empty()) { + return SUCCESS; + } + GELOGI("The input_fp16_nodes is set %s", input_fp16_nodes.c_str()); + vector input_fp16_nodes_vec = StringUtils::Split(input_fp16_nodes, ';'); + for (uint32_t i = 0; i < input_fp16_nodes_vec.size(); ++i) { + ge::NodePtr node = graph->FindNode(input_fp16_nodes_vec[i]); + if (node == nullptr) { + ErrorManager::GetInstance().ATCReportErrMessage("E10034", {"parameter", "opname"}, + {"input_fp16_nodes", input_fp16_nodes_vec[i]}); + GELOGE(PARAM_INVALID, "Can not find node [%s] in graph, please check input_fp16_nodes param", + input_fp16_nodes_vec[i].c_str()); + return PARAM_INVALID; + } + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (op_desc->GetType() != DATA) { + ErrorManager::GetInstance().ATCReportErrMessage("E10035", {"parameter", "opname"}, + {"input_fp16_nodes", input_fp16_nodes_vec[i]}); + GELOGE(PARAM_INVALID, "input_fp16_nodes: %s is not a input node name", input_fp16_nodes_vec[i].c_str()); + return PARAM_INVALID; + } + if (ge::AttrUtils::SetBool(op_desc, "input_fp16", true)) { + if ((i < adjust_fp16_format_vec.size()) && (adjust_fp16_format_vec[i] == "true")) { + GELOGI("This node [%s] should be set NC1HWC0", input_fp16_nodes_vec[i].c_str()); + if (!ge::AttrUtils::SetBool(op_desc, "input_set_nc1hwc0", true)) { + GELOGW("This node [%s] set NC1HWC0 failed", input_fp16_nodes_vec[i].c_str()); + } + } + } + } + return SUCCESS; +} + +static Status SetWeightCompressNodes(const ComputeGraphPtr &graph, const string &compress_weight_conf) { + GE_CHECK_NOTNULL(graph); + if (compress_weight_conf.empty()) { + return SUCCESS; + } + std::string real_path = RealPath(compress_weight_conf.c_str()); + if (real_path.empty()) { + GELOGE(PARAM_INVALID, "Can not get real path for %s.", compress_weight_conf.c_str()); + return PARAM_INVALID; + } + std::ifstream ifs(real_path); + if (!ifs.is_open()) { + GELOGE(domi::FAILED, "Open file %s failed", compress_weight_conf.c_str()); + return domi::FAILED; + } + + std::string compress_nodes; + ifs >> compress_nodes; + ifs.close(); + GELOGI("Compress weight of nodes: %s", compress_nodes.c_str()); + + vector compress_node_vec = StringUtils::Split(compress_nodes, ';'); + for (size_t i = 0; i < compress_node_vec.size(); ++i) { + ge::NodePtr node = graph->FindNode(compress_node_vec[i]); + if (node == nullptr) { + GELOGW("node %s is not in graph", compress_node_vec[i].c_str()); + continue; + } + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (!ge::AttrUtils::SetBool(op_desc, ge::ATTR_NAME_COMPRESS_WEIGHT, true)) { + GELOGE(domi::FAILED, "node %s SetBool failed.", compress_node_vec[i].c_str()); + return domi::FAILED; + } + } + return SUCCESS; +} + +static Status ParseOutputFp16NodesFormat(const string &is_output_fp16) { + if (is_output_fp16.empty()) { + return SUCCESS; + } + + vector &output_formats = domi::GetContext().output_formats; + output_formats.clear(); + vector node_format_vec = StringUtils::Split(is_output_fp16, ','); + for (auto &is_fp16 : node_format_vec) { + StringUtils::Trim(is_fp16); + if (!CheckInputTrueOrFalse(is_fp16, "is_output_adjust_hw_layout")) { + GELOGE(PARAM_INVALID, "Invalid Param, is_output_adjust_hw_layout only support true/false: but is [%s]", + is_output_fp16.c_str()); + return PARAM_INVALID; + } + if (is_fp16 == "false") { + output_formats.push_back(DOMI_TENSOR_ND); + } else if (is_fp16 == "true") { + output_formats.push_back(domi::DOMI_TENSOR_NC1HWC0); + } + } + return SUCCESS; +} + +void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path) { + // path, Change to absolute path + string real_path = RealPath(path.c_str()); + if (real_path.empty()) { // plugin path does not exist + return; + } + + struct dirent *dent(nullptr); + DIR *dir = opendir(real_path.c_str()); + + if (nullptr == dir) { // plugin path does not exist + GELOGW("Open directory %s failed.", path.c_str()); + return; + } + + while ((dent = readdir(dir)) != nullptr) { + if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; + string name = dent->d_name; + string full_name = real_path + "/" + name; + const string so_suff = ".so"; + const string caffe_parser_so_suff = "lib_caffe_parser.so"; + const string aicpu_so_suff = "_aicpu.so"; + const string aicpu_host_so_suff = "_online.so"; + if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { + if (full_name.size() >= caffe_parser_so_suff.size() && + full_name.compare(full_name.size() - caffe_parser_so_suff.size(), caffe_parser_so_suff.size(), + caffe_parser_so_suff) == 0) { + caffe_parser_path = full_name; + } else if ((full_name.size() >= aicpu_so_suff.size() && + full_name.compare(full_name.size() - aicpu_so_suff.size(), aicpu_so_suff.size(), aicpu_so_suff) == + 0) || + (full_name.size() >= aicpu_host_so_suff.size() && + full_name.compare(full_name.size() - aicpu_host_so_suff.size(), aicpu_host_so_suff.size(), + aicpu_host_so_suff) == 0)) { + // aicpu so, Put the file path into the omgcontext and save into the model in the builder stage; + domi::GetContext().aicpu_op_run_paths.push_back(full_name); + } else { // save parser so path into file_list vector + file_list.push_back(full_name); + } + continue; + } + + FindParserSo(full_name, file_list, caffe_parser_path); + } + closedir(dir); + return; +} + +Status CheckCustomAiCpuOpLib() { + std::vector vec_op_type; + domi::OpRegistry::Instance()->GetOpTypeByImplyType(vec_op_type, domi::ImplyType::CUSTOM); + for (uint32_t i = 0; i < vec_op_type.size(); i++) { + bool aicpu_so_exist = false; + std::string ai_cpu_so_name = "lib" + vec_op_type[i] + "_aicpu.so"; + for (uint32_t j = 0; j < domi::GetContext().aicpu_op_run_paths.size(); j++) { + string bin_file_path = domi::GetContext().aicpu_op_run_paths[j]; + if (bin_file_path.size() >= ai_cpu_so_name.size() && + bin_file_path.compare(bin_file_path.size() - ai_cpu_so_name.size(), ai_cpu_so_name.size(), ai_cpu_so_name) == + 0) { + aicpu_so_exist = true; + break; + } + } + if (!aicpu_so_exist) { + GELOGE(domi::FAILED, "cant find aicpu run so(%s), please check the plugin path!", ai_cpu_so_name.c_str()); + return domi::FAILED; + } + } + return domi::SUCCESS; +} + +Status SetOutFormatAndDataTypeAttr(ge::OpDescPtr op_desc, const ge::Format format, const ge::DataType data_type) { + if (op_desc == nullptr) { + GELOGE(domi::FAILED, "Input op desc invalid."); + return domi::FAILED; + } + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_NET_OUTPUT_FORMAT, format); + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_NET_OUTPUT_DATATYPE, data_type); + return domi::SUCCESS; +} + +Status StringToInt(std::string &str, int32_t &value) { + try { + value = stoi(str); + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", str.c_str()); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", str.c_str()); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status VerifyOutputTypeAndOutNodes(std::vector &out_type_vec) { + std::vector> user_out_nodes = domi::GetContext().user_out_nodes; + std::set out_nodes_info; + for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { + // out_nodes set should include output_type and output_format + std::string tmp = user_out_nodes[i].first + ":" + to_string(user_out_nodes[i].second); + out_nodes_info.emplace(tmp); + } + for (uint32_t i = 0; i < out_type_vec.size(); ++i) { + if (out_nodes_info.find(out_type_vec[i]) == out_nodes_info.end()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10059", {"value"}, {out_type_vec[i]}); + GELOGE(domi::FAILED, "Can not find this node (%s) in out_nodes.", out_type_vec[i].c_str()); + return domi::FAILED; + } + } + return domi::SUCCESS; +} + +Status ParseOutputType(const std::string &output_type, std::map> &out_type_index_map, + std::map> &out_type_dt_map) { + if (output_type.find(':') == std::string::npos) { + GELOGI("output_type is not multiple nodes, means all out nodes"); + auto it = output_type_str_to_datatype.find(output_type); + if (it == output_type_str_to_datatype.end()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {output_type}); + GELOGE(ge::PARAM_INVALID, "Invalid value for --output_type[%s], only support DT_FLOAT, DT_FLOAT16, DT_UINT8!!", + output_type.c_str()); + return domi::FAILED; + } + return domi::SUCCESS; + } + std::vector out_type_vec; + vector nodes_v = StringUtils::Split(output_type, ';'); + for (const string &node : nodes_v) { + vector node_index_type_v = StringUtils::Split(node, ':'); + if (node_index_type_v.size() != 3) { // The size must be 3. + ErrorManager::GetInstance().ATCReportErrMessage("E10058", {"value"}, {node}); + GELOGE(PARAM_INVALID, + "The param of output_type is invalid, the correct format is [opname:index:dtype]," + "while the actual input is %s.", + node.c_str()); + return domi::FAILED; + } + ge::DataType tmp_dt; + std::string node_name = StringUtils::Trim(node_index_type_v[0]); + std::string index_str = StringUtils::Trim(node_index_type_v[1]); + int32_t index; + if (StringToInt(index_str, index) != SUCCESS) { + return domi::FAILED; + } + std::string dt_value = StringUtils::Trim(node_index_type_v[2]); + auto it = output_type_str_to_datatype.find(dt_value); + if (it == output_type_str_to_datatype.end()) { + ErrorManager::GetInstance().ATCReportErrMessage("E10042", {"value"}, {dt_value}); + GELOGE(ge::PARAM_INVALID, "output_type [%s] is invalid.", dt_value.c_str()); + return domi::FAILED; + } else { + tmp_dt = it->second; + } + out_type_vec.push_back(node_name + ":" + index_str); + auto it_index = out_type_index_map.find(node_name); + if (it_index == out_type_index_map.end()) { + vector tmp_vec; + tmp_vec.push_back(index); + out_type_index_map.emplace(node_name, tmp_vec); + } else { + it_index->second.push_back(index); + } + + auto it_dt = out_type_dt_map.find(node_name); + if (it_dt == out_type_dt_map.end()) { + vector tmp_vec; + tmp_vec.push_back(tmp_dt); + out_type_dt_map.emplace(node_name, tmp_vec); + } else { + it_dt->second.push_back(tmp_dt); + } + } + return VerifyOutputTypeAndOutNodes(out_type_vec); +} + +Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output) { + ge::ComputeGraphPtr compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + + std::vector> user_out_nodes = domi::GetContext().user_out_nodes; + std::vector output_formats = domi::GetContext().output_formats; + std::vector> output_nodes_info; + std::vector output_nodes_name; + + std::map> out_type_index_map; + std::map> out_type_dt_map; + if (!output_type.empty()) { + if (ParseOutputType(output_type, out_type_index_map, out_type_dt_map) != SUCCESS) { + GELOGE(domi::FAILED, "Parse output_type failed."); + return domi::FAILED; + } + } + + // User declared outputs + for (uint32_t i = 0; i < user_out_nodes.size(); ++i) { + ge::NodePtr out_node = compute_graph->FindNode(user_out_nodes[i].first); + if (out_node == nullptr) { + GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str()); + return domi::FAILED; + } + if (out_node->GetType() == DATA) { + GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", user_out_nodes[i].first.c_str()); + return domi::FAILED; + } + auto op_desc = out_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (i < output_formats.size()) { + if (output_formats[i] == domi::DOMI_TENSOR_NC1HWC0) { + GELOGI("The output node [%s] should be set NC1HWC0", user_out_nodes[i].first.c_str()); + if (!ge::AttrUtils::SetBool(op_desc, "output_set_fp16_nc1hwc0", true)) { + GELOGW("The output node [%s] set NC1HWC0 failed", user_out_nodes[i].first.c_str()); + } + } + } + auto it_index = out_type_index_map.find(user_out_nodes[i].first); + auto it_dt = out_type_dt_map.find(user_out_nodes[i].first); + if ((it_index != out_type_index_map.end()) && (it_dt != out_type_dt_map.end())) { + GELOGI("The output node [%s] need to be set output_type", user_out_nodes[i].first.c_str()); + (void)ge::AttrUtils::SetListDataType(op_desc, "_output_dt_list", it_dt->second); + (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second); + } + output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second)); + output_nodes_name.push_back(out_node->GetName()); + } + // default output node (leaf) + if (user_out_nodes.empty()) { + for (ge::NodePtr node : compute_graph->GetDirectNode()) { + if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) { + Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail."); + } + } + } + compute_graph->SetGraphOutNodesInfo(output_nodes_info); + domi::GetContext().net_out_nodes = output_nodes_name; + return domi::SUCCESS; +} + +Status GetOutputLeaf(NodePtr node, std::vector> &output_nodes_info, + std::vector &output_nodes_name) { + ge::OpDescPtr tmpDescPtr = node->GetOpDesc(); + if (tmpDescPtr == nullptr) { + GELOGE(domi::FAILED, "Get outnode op desc fail."); + return domi::FAILED; + } + size_t size = tmpDescPtr->GetOutputsSize(); + if (node->GetType() != NETOUTPUT) { + for (size_t index = 0; index < size; ++index) { + output_nodes_info.push_back(std::make_pair(node, index)); + output_nodes_name.push_back(node->GetName()); + } + } else { + const auto in_anchors = node->GetAllInDataAnchors(); + for (auto in_anchor : in_anchors) { + auto out_anchor = in_anchor->GetPeerOutAnchor(); + if (out_anchor == nullptr) { + GELOGE(domi::FAILED, "Get leaf node op desc fail."); + return domi::FAILED; + } + auto out_node = out_anchor->GetOwnerNode(); + output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx())); + output_nodes_name.push_back(out_node->GetName()); + } + } + return SUCCESS; +} + +/// +/// @ingroup domi_common +/// @brief Initialize omgcontext based on command line input +/// @param [in] input_shape Input shape string to be parsed +/// @return SUCCESS: parse successfully; PARAM_INVALID:parse failed +/// +Status InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, + bool is_dynamic_input) { + // Clear omgcontext data first + domi::GetContext().input_dims.clear(); + domi::GetContext().user_input_dims.clear(); + domi::GetContext().is_dynamic_input = is_dynamic_input; + + // the default value is ND + domi::GetContext().format = DOMI_TENSOR_ND; + if (!input_format.empty()) { + auto iter = ge::input_format_str_to_geformat.find(input_format); + if (iter != ge::input_format_str_to_geformat.end()) { + domi::GetContext().format = iter->second; + } else { + GELOGE(PARAM_INVALID, "Input format %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.", + input_format.c_str()); + return PARAM_INVALID; + } + } + + // Input is empty, do not process + if (input_shape.empty()) { + return SUCCESS; + } + + // Analyze the input shape paramete + unordered_map> &shape_map = domi::GetContext().input_dims; + + if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, + is_dynamic_input) || + shape_map.empty()) { + GELOGE(PARAM_INVALID, "Failed to parse input shape: %s", input_shape.c_str()); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status ParseOutNodes(const string &out_nodes) { + try { + // parse output node + if (!out_nodes.empty()) { + domi::GetContext().out_nodes_map.clear(); + domi::GetContext().user_out_nodes.clear(); + + vector nodes_v = StringUtils::Split(out_nodes, ';'); + for (const string &node : nodes_v) { + vector key_value_v = StringUtils::Split(node, ':'); + if (key_value_v.size() != 2) { // The size must be 2. + ErrorManager::GetInstance().ATCReportErrMessage("E10069", {"param", "value", "supports"}, + {"out_nodes", node, "opname:index"}); + GELOGE(PARAM_INVALID, + "The input format of --out_nodes is invalid, the correct format is " + "\"node_name1:0;node_name1:1;node_name2:0\", while the actual input is %s.", + node.c_str()); + return PARAM_INVALID; + } + auto iter = domi::GetContext().out_nodes_map.find(key_value_v[0]); + // stoi: The method may throw an exception: invalid_argument/out_of_range + int32_t index = stoi(StringUtils::Trim(key_value_v[1])); + if (iter != domi::GetContext().out_nodes_map.end()) { + iter->second.emplace_back(index); + } else { + std::vector index_v; + index_v.emplace_back(index); + domi::GetContext().out_nodes_map.emplace(key_value_v[0], index_v); + } + domi::GetContext().user_out_nodes.push_back(std::make_pair(key_value_v[0], index)); + } + } + } catch (std::invalid_argument &) { + GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); + return PARAM_INVALID; + } catch (std::out_of_range &) { + GELOGE(PARAM_INVALID, "Invalid of out_nodes: %s ", out_nodes.c_str()); + return PARAM_INVALID; + } + + return SUCCESS; +} + +/// @ingroup domi_common +/// @brief Judge whether the op_Name_Map parameter matches the network +/// @param [in] graph Input network graph +/// @return SUCCESS: Input parameters are correct; PARAM_INVALID: Input parameters are incorrect +/// +static Status CheckOpNameMap(const ComputeGraphPtr &graph) { + GE_CHECK_NOTNULL(graph); + unordered_map graphNodeTypes; + for (const NodePtr &node : graph->GetAllNodes()) { + auto op_desc = node->GetOpDesc(); + if (op_desc == nullptr) { + GELOGE(PARAM_INVALID, "Invalid parameter for opDesc."); + return PARAM_INVALID; + } + graphNodeTypes[op_desc->GetType()] = ""; + } + std::map &propertiesMap = domi::GetContext().op_conf_map; + GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(propertiesMap.empty(), "op_name_map file is empty, please check file!"); + for (auto iter = propertiesMap.begin(); iter != propertiesMap.end(); iter++) { + GE_IF_BOOL_EXEC(graphNodeTypes.find(iter->second) == graphNodeTypes.end(), + ErrorManager::GetInstance().ATCReportErrMessage("E10060", {"parameter"}, {"op_name_map"}); + GELOGE(PARAM_INVALID, "Invalid parameter for op_name_map."); return PARAM_INVALID;); + } + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map &atc_params, + const char *model_file, const char *weights_file, domi::FrameworkType type, + const char *op_conf, const char *target, RunMode run_mode, + bool is_dynamic_input) { + GE_CHECK_NOTNULL(model_file); + GE_CHECK_NOTNULL(weights_file); + domi::GetContext().type = type; + domi::GetContext().run_mode = run_mode; + // Prevent data residue in multiple calls + PreChecker::Instance().Clear(); + + Params::Instance()->SetTarget(target); + + // Create an empty computegraph + ComputeGraphPtr compute_graph = nullptr; + GE_MAKE_SHARED(compute_graph = std::make_shared(kGraphDefaultName + "_" + CurrentTimeInStr()), + return FAILED); + graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + + // initialize omgContext + std::string input_shape; + ParseAtcParms(atc_params, "input_shape", input_shape); + std::string input_format; + ParseAtcParms(atc_params, "input_format", input_format); + GE_RETURN_WITH_LOG_IF_ERROR(InitDomiOmgContext(input_shape, input_format, "", is_dynamic_input), + "ATC Generate call InitDomiOmgContext ret fail"); + + std::string is_output_adjust_hw_layout; + ParseAtcParms(atc_params, "is_output_adjust_hw_layout", is_output_adjust_hw_layout); + GE_RETURN_WITH_LOG_IF_ERROR(ParseOutputFp16NodesFormat(is_output_adjust_hw_layout), "Parse is_output_fp16 failed"); + + std::string out_nodes; + ParseAtcParms(atc_params, "out_nodes", out_nodes); + GE_RETURN_WITH_LOG_IF_ERROR(ParseOutNodes(out_nodes), "ATC Generate parse out nodes fail"); + + std::string output_type; + ParseAtcParms(atc_params, "output_type", output_type); + + // parse configuration item + if (op_conf != nullptr && *op_conf != '\0') { + // divided by ":" + PropertiesManager::Instance().SetPropertyDelimiter(OP_CONF_DELIMITER); + // Parsing the op_conf configuration item file + GE_IF_BOOL_EXEC(!PropertiesManager::Instance().Init(op_conf), + ErrorManager::GetInstance().ATCReportErrMessage("E10060", {"parameter"}, {"op_name_map"}); + GELOGE(FAILED, "op_name_map init failed!"); return FAILED); + // Return map and put it into ATC global variable + domi::GetContext().op_conf_map = PropertiesManager::Instance().GetPropertyMap(); + } + + // parse network model + auto model_parser = ModelParserFactory::Instance()->CreateModelParser(type); + GE_CHK_BOOL_RET_STATUS(model_parser != nullptr, FAILED, "ATC create model parser ret fail, type:%d.", type); + + UpdateParserCtxWithOmgCtx(); + Status ret = model_parser->Parse(model_file, graph); + UpdateOmgCtxWithParserCtx(); + + // Generate the report in case of pre inspection failure or only pre inspection mode + if (PreChecker::Instance().HasError() || run_mode == ONLY_PRE_CHECK) { + std::string check_report; + ParseAtcParms(atc_params, "check_report", check_report); + GE_RETURN_WITH_LOG_IF_ERROR(PreChecker::Instance().Save(check_report), "Generate pre-checking report failed."); + GELOGI("The pre-checking report has been saved to %s.", check_report.c_str()); + } + + // Prevent data residue in multiple calls + PreChecker::Instance().Clear(); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC model parse ret fail."); + + std::string input_fp16_nodes; + ParseAtcParms(atc_params, "input_fp16_nodes", input_fp16_nodes); + std::string is_input_adjust_hw_layout; + ParseAtcParms(atc_params, "is_input_adjust_hw_layout", is_input_adjust_hw_layout); + compute_graph = GraphUtils::GetComputeGraph(graph); + GE_RETURN_IF_ERROR(CheckInputFp16Nodes(compute_graph, input_fp16_nodes, is_input_adjust_hw_layout)); + + GE_RETURN_IF_ERROR(CheckInputShapeNode(compute_graph)); + + std::string compress_weight_conf; + ParseAtcParms(atc_params, "compress_weight_conf", compress_weight_conf); + GE_RETURN_IF_ERROR(SetWeightCompressNodes(compute_graph, compress_weight_conf)); + + // Verify the contents of the op_name_map + if (op_conf != nullptr && *op_conf != '\0') { + GE_RETURN_WITH_LOG_IF_ERROR(CheckOpNameMap(compute_graph), "op_name_map parameter is not fit with input net!"); + } + + // Print parse network structure + compute_graph->Dump(); + + // parse weight + graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + auto weights_parser = WeightsParserFactory::Instance()->CreateWeightsParser(type); + ret = weights_parser->Parse(weights_file, graph); + GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail."); + + // IN ONLY_PRE_CHECK mode, generate pre inspection report only. + if (run_mode == ONLY_PRE_CHECK) { + return SUCCESS; + } + + GELOGI("ATC parser success."); + + return SUCCESS; +} + +void GetGroupName(ge::proto::ModelDef &model_def) { + auto modelAttrMap = model_def.mutable_attr(); + auto fusionModelOpListIter = modelAttrMap->find(MODEL_ATTR_FUSION_MODEL_DEF); + GE_IF_BOOL_EXEC( + fusionModelOpListIter != modelAttrMap->end(), int fusionOpIndex = 0; + for (int i = 0; i < model_def.graph_size(); i++) { + auto graph = model_def.mutable_graph(i); + for (int j = 0; j < graph->op_size(); j++) { + int64_t scope_id = 0; + auto bt = fusionModelOpListIter->second.list().bt(fusionOpIndex++); + ge::proto::OpDef fusion_op_def; + GE_CHK_BOOL_EXEC(bt.size() != 0, GELOGW("Invalid bt size"); return;); + + (void)(fusion_op_def.ParseFromArray(bt.data(), bt.size())); + auto fusion_attr_map = fusion_op_def.mutable_attr(); + auto fusion_iter = fusion_attr_map->find(kScopeIdAttr); + GE_IF_BOOL_EXEC(fusion_iter == fusion_attr_map->end(), continue;); + + scope_id = fusion_iter->second.i(); + ge::proto::OpDef *opdef = graph->mutable_op(j); + auto attr_map = opdef->mutable_attr(); + + int64_t stream_id = opdef->stream_id(); + + uint16_t l1_id = (((uint64_t)scope_id & 0xFFFF0000)) >> 16; + GE_IF_BOOL_EXEC(l1_id != 0, ostringstream groupName; groupName << "group_op_l1_" << l1_id << "_" << stream_id; + (*attr_map)["group_op_name"].set_s(groupName.str()); continue;); + + uint16_t ub_id = ((uint64_t)scope_id & 0xFFFF); + GE_IF_BOOL_EXEC(ub_id != 0, ostringstream groupName; groupName << "group_op_ub_" << ub_id << "_" << stream_id; + (*attr_map)["group_op_name"].set_s(groupName.str());); + } + }); +} + +FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, const char *json_file) { + GE_CHECK_NOTNULL(model_file); + GE_CHECK_NOTNULL(json_file); + ge::ModelData model; + + // Mode 2 does not need to verify the priority, and a default value of 0 is passed + int32_t priority = 0; + + // Load model from file + Status ret = ModelParserBase::LoadFromFile(model_file, "", priority, model); + + if (ret != SUCCESS) { + GELOGE(ret, "LoadFromFile failed."); + return ret; + } + + uint8_t *model_data = nullptr; + uint32_t model_len = 0; + + // Parse the contents of the file to get the modeldef object + ret = ModelParserBase::ParseModelContent(model, model_data, model_len); + + if (ret == SUCCESS) { + OmFileLoadHelper omFileLoadHelper; + ge::graphStatus status = omFileLoadHelper.Init(model_data, model_len); + if (status != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "Om file init failed."); + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; + } + return status; + } + + ModelPartition ir_part; + status = omFileLoadHelper.GetModelPartition(MODEL_DEF, ir_part); + if (status != ge::GRAPH_SUCCESS) { + GELOGE(ge::FAILED, "Get model part failed."); + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; + } + return status; + } + + ge::proto::ModelDef model_def; + + // De serialization + bool flag = ReadProtoFromArray(ir_part.data, ir_part.size, &model_def); + + if (flag) { + GetGroupName(model_def); + + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + + ret = ModelSaver::SaveJsonToFile(json_file, j); + } else { + ret = INTERNAL_ERROR; + GELOGE(ret, "ReadProtoFromArray failed."); + } + } else { + GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); + } + + if (model.model_data != nullptr) { + delete[](char *) model.model_data; + model.model_data = nullptr; + } + + return ret; +} + +FMK_FUNC_HOST_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char *json_file) { + ge::ModelData model; + + // Mode 2 does not need to verify the priority, and a default value of 0 is passed + int32_t priority = 0; + + // Load model from file + Status ret = ModelParserBase::LoadFromFile(model_file, "", priority, model); + auto free_model_data = [](void **ptr) -> void { + if (ptr != nullptr && *ptr != nullptr) { + delete[] reinterpret_cast(*ptr); + *ptr = nullptr; + } + }; + if (ret != SUCCESS) { + free_model_data(&model.model_data); + GELOGE(ret, "LoadFromFile failed."); + return ret; + } + + ge::proto::ModelDef model_def; + bool flag = google::protobuf::TextFormat::ParseFromString(reinterpret_cast(model.model_data), &model_def); + if (!flag) { + free_model_data(&model.model_data); + GELOGE(FAILED, "ParseFromString fail."); + return FAILED; + } + + GetGroupName(model_def); + json j; + Pb2Json::Message2Json(model_def, kOmBlackFields, j, true); + ret = ModelSaver::SaveJsonToFile(json_file, j); + if (ret != SUCCESS) { + free_model_data(&model.model_data); + GELOGE(ret, "Save json to file fail."); + return ret; + } + + free_model_data(&model.model_data); + + return SUCCESS; +} + +FMK_FUNC_HOST_VISIBILITY Status ConvertFwkModelToJson(const domi::FrameworkType framework, const char *model_file, + const char *json_file) { + if (framework == domi::CAFFE || framework == domi::TENSORFLOW) { + auto model_parser = ModelParserFactory::Instance()->CreateModelParser(framework); + GE_CHK_BOOL_RET_STATUS(model_parser != nullptr, FAILED, "ATC create model parser ret fail, framework:%d.", + framework); + return model_parser->ToJson(model_file, json_file); + } + + ErrorManager::GetInstance().ATCReportErrMessage("E10045", {"parameter"}, {"model"}); + GELOGE(PARAM_INVALID, "Input parameter[--framework] is mandatory and it's value must be: 0(Caffe) 3(TensorFlow)."); + return PARAM_INVALID; +} + +FMK_FUNC_HOST_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file) { + // Create buffer + GELOGI("Enter to dump infershape json schedule."); + ge::Model model("", ""); + model.SetGraph(graph); + Buffer buffer; + model.Save(buffer, true); + + ge::proto::ModelDef ge_proto; + if (buffer.GetData() != nullptr) { + std::string str(reinterpret_cast(buffer.GetData()), buffer.GetSize()); + if (!ge_proto.ParseFromString(str)) { + GELOGE(GRAPH_FAILED, "parse from string failed."); + return FAILED; + } + + nlohmann::json j; + Pb2Json::Message2Json(ge_proto, std::set(), j); + + ModelSaver::SaveJsonToFile(json_file, j); + } + return SUCCESS; +} + +void UpdateOmgCtxWithParserCtx() { + domi::GetContext().format = GetParserContext().format; + domi::GetContext().input_dims = GetParserContext().input_dims; + return; +} + +void UpdateParserCtxWithOmgCtx() { + GetParserContext().format = domi::GetContext().format; + GetParserContext().input_dims = domi::GetContext().input_dims; + GetParserContext().run_mode = domi::GetContext().run_mode; + return; +} +} // namespace ge diff --git a/src/ge/session/session_manager.cc b/src/ge/session/session_manager.cc index bfdd9f2d..c3439b0b 100644 --- a/src/ge/session/session_manager.cc +++ b/src/ge/session/session_manager.cc @@ -246,6 +246,24 @@ Status SessionManager::RegisterCallBackFunc( return innerSession->RegisterCallBackFunc(key, callback); } +Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector &inputs) { + if (!init_flag_) { + GELOGE(GE_SESSION_MANAGER_NOT_INIT); + return GE_SESSION_MANAGER_NOT_INIT; + } + SessionPtr innerSession = nullptr; + { + std::lock_guard lock(mutex_); + std::map::iterator it = session_manager_map_.find(session_id); + if (it == session_manager_map_.end()) { + return GE_SESSION_NOT_EXIST; + } else { + innerSession = it->second; + } + } + return innerSession->BuildGraph(graph_id, inputs); +} + Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id, const std::vector &inputs, RunAsyncCallback callback) { if (!init_flag_) { diff --git a/src/ge/session/session_manager.h b/src/ge/session/session_manager.h index 111795ed..5cce5214 100644 --- a/src/ge/session/session_manager.h +++ b/src/ge/session/session_manager.h @@ -104,6 +104,16 @@ class SessionManager { /// Status GetVariable(SessionId session_id, const std::string &name, Tensor &val); + /// + /// @ingroup ge_session + /// @brief build a graph of the session with specific session id + /// @param [in] session_id session id + /// @param [in] graph_id graph id + /// @param [in] inputs input data + /// @return Status result of function + /// + Status BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector &inputs); + /// /// @ingroup ge_session /// @brief run a graph of the session with specific session id for train asynchronously diff --git a/src/ge/single_op/single_op_model.cc b/src/ge/single_op/single_op_model.cc index 16375741..9decdf75 100644 --- a/src/ge/single_op/single_op_model.cc +++ b/src/ge/single_op/single_op_model.cc @@ -94,7 +94,7 @@ Status SingleOpModel::InitModelMem(StreamResource &res) { return PARAM_INVALID; } - if (model_params_.memory_size > 0) { + if (model_params_.memory_size > model_params_.zero_copy_mem_size) { const string purpose("malloc feature map memory on model execute."); GELOGI("total memory: %lu, zero_copy_mem: %lu", model_params_.memory_size, model_params_.zero_copy_mem_size); model_params_.mem_base = res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size); diff --git a/src/ge/single_op/task/aicpu_task_builder.cc b/src/ge/single_op/task/aicpu_task_builder.cc index e4b7aa80..1a4c37ca 100644 --- a/src/ge/single_op/task/aicpu_task_builder.cc +++ b/src/ge/single_op/task/aicpu_task_builder.cc @@ -48,7 +48,7 @@ Status AiCpuTaskBuilder::SetInputOutputAddr(void **io_addr, const std::vector sizeof(STR_FWK_OP_KERNEL)) { GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); return PARAM_INVALID; @@ -105,7 +105,7 @@ Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam return ret; } - STR_FWK_OP_KERNEL fwk_op_kernel; + STR_FWK_OP_KERNEL fwk_op_kernel = {0}; ret = SetFmkOpKernel(io_addr, ws_addr_vec[0], fwk_op_kernel); if (ret != SUCCESS) { (void)rtFree(io_addr); diff --git a/src/proto/task.proto b/src/proto/task.proto index 8ef5c2e2..50ea061b 100644 --- a/src/proto/task.proto +++ b/src/proto/task.proto @@ -75,6 +75,8 @@ message KernelDef { bytes flowtable = 15; string so_name = 16; string kernel_name = 17; + bytes kernel_ext_info = 18; + uint32 kernel_ext_info_size = 19; } message KernelContext { @@ -97,6 +99,8 @@ message KernelExDef { bytes args = 13; bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput uint32 task_info_size = 15; + bytes kernel_ext_info = 16; + uint32 kernel_ext_info_size = 17; } diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h index 8e996b9b..d61c981d 100644 --- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h +++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h @@ -27,9 +27,9 @@ extern "C" { The different framwork we adapted for. */ typedef enum { - FMK_KERNEL_TYPE_T = 0, - FMK_KERNEL_TYPE_C = 10, - FMK_KERNEL_TYPE_P = 20, + FMK_KERNEL_TYPE_TF = 0, + FMK_KERNEL_TYPE_CF = 10, + FMK_KERNEL_TYPE_PT = 20, FMK_KERNEL_TYPE_RESERVED } FwkkernelType_t; diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h index 09827358..35134faa 100644 --- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h +++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h @@ -75,17 +75,25 @@ struct StrFWKKernel { uint64_t workspaceBaseAddr; // Workspace base addr, need convert to void* uint64_t inputOutputAddr; // InputOutput addr, need convert to void* - uint64_t extInfoNum; // extend info number - uint64_t extInfoAddr; // extend info addr list, ExtInfo structure, num equal to extInfoNum + uint64_t extInfoLen; // extend info total length + uint64_t extInfoAddr; // extend info addr, ExtInfo structure } __attribute__((packed)); typedef StrFWKKernel FWKOperateParam; +// Extent info ShapeAndType +const uint32_t kMaxShapeDims = 8; +struct ShapeAndType { + int32_t type; + int64_t dims[kMaxShapeDims]; +} __attribute__((packed)); + // Extend info structure for extInfoAddr -struct ExtInfo{ - int32_t infoType; - uint32_t infoLen; - uint64_t infoAddr; +const uint32_t kExtInfoHeadSize = 8; +struct ExtInfo { + int32_t infoType; // extend type + uint32_t infoLen; // length for infoMsg + char infoMsg[0]; // extend value } __attribute__((packed)); struct ResultSummary { diff --git a/third_party/fwkacllib/inc/ops/aipp.h b/third_party/fwkacllib/inc/ops/aipp.h index 6053bb76..9dc5a018 100644 --- a/third_party/fwkacllib/inc/ops/aipp.h +++ b/third_party/fwkacllib/inc/ops/aipp.h @@ -32,6 +32,8 @@ namespace ge { *@par Outputs: *features: The AIPP-processed output tensor of type float16 or uint8. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(Aipp) .INPUT(images, TensorType{DT_UINT8}) diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index e1ea5537..0d2a05a3 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -46,6 +46,9 @@ Defaults to `int32`. *Supported *@par L2 convergence supported or not *@par Multiple batches supported or not + +*@par Third-party framework compatibility +*Compatible with tensorflow Operator LowerBound. */ REG_OP(LowerBound) @@ -77,6 +80,9 @@ reversal is performed. *@attention Constraints: \n *ReverseSequence runs on the Ascend AI CPU, which delivers poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ReverseSequence. */ REG_OP(ReverseSequence) @@ -108,6 +114,9 @@ keeps entire upper triangle. *@attention Constraints: \n *MatrixBandPart runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MatrixBandPart. */ REG_OP(MatrixBandPart) @@ -141,6 +150,9 @@ Defaults to "int32". *@attention Constraints: \n *UniqueWithCounts runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UniqueWithCounts. */ REG_OP(UniqueWithCounts) @@ -170,6 +182,9 @@ are 0D scalars. *@attention Constraints: \n *Unique runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Unique. */ REG_OP(Unique) @@ -201,6 +216,9 @@ Defaults to "int32". *@attention Constraints: \n *UniqueExt2 runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UniqueExt2. */ REG_OP(UniqueExt2) @@ -224,6 +242,9 @@ REG_OP(UniqueExt2) *@attention Constraints: \n *InvertPermutation runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InvertPermutation. */ REG_OP(InvertPermutation) @@ -245,6 +266,9 @@ REG_OP(InvertPermutation) *@attention Constraints: \n *CheckNumerics runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator CheckNumerics. */ REG_OP(CheckNumerics) @@ -268,6 +292,9 @@ the flattened version of an array of dimensions "dims". *@attention Constraints: \n *UnravelIndex runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UnravelIndex. */ REG_OP(UnravelIndex) @@ -292,6 +319,9 @@ REG_OP(UnravelIndex) *@attention Constraints: \n *UpperBound runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UpperBound. */ REG_OP(UpperBound) @@ -322,6 +352,9 @@ Defaults to "int32". *@attention Constraints: \n *UniqueWithCountsExt2 runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UniqueWithCountsExt2. */ REG_OP(UniqueWithCountsExt2) @@ -354,6 +387,9 @@ do include the borders. *@attention Constraints: \n *MirrorPad runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MirrorPad. */ REG_OP(MirrorPad) @@ -384,6 +420,9 @@ REG_OP(MirrorPad) *@attention Constraints: \n *ListDiff runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ListDiff. */ REG_OP(ListDiff) @@ -407,6 +446,8 @@ REG_OP(ListDiff) *@par Outputs: *y: The empty constant tensor. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator _ParallelConcatStart. */ REG_OP(_ParallelConcatStart) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -424,6 +465,9 @@ Operator Const has the same definition as operator Constant. *@par Outputs: *y: A constant tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Const. */ REG_OP(Const) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ @@ -439,6 +483,9 @@ REG_OP(Const) *@par Outputs: *y: The constant tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Const. */ REG_OP(Constant) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ @@ -454,6 +501,9 @@ REG_OP(Constant) *@par Outputs: *y: A tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Snapshot. */ REG_OP(Snapshot) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ @@ -470,6 +520,9 @@ REG_OP(Snapshot) *@par Outputs: *y: The input tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator GuaranteeConst. */ REG_OP(GuaranteeConst) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -487,6 +540,9 @@ REG_OP(GuaranteeConst) *@par Outputs: *y: A tensor. The broadcasted shape. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BroadcastArgs. */ REG_OP(BroadcastArgs) .INPUT(x1, TensorType({DT_INT32, DT_INT64})) @@ -505,6 +561,9 @@ REG_OP(BroadcastArgs) *@par Outputs: *y: The input tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator PreventGradient. */ REG_OP(PreventGradient) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -525,6 +584,9 @@ REG_OP(PreventGradient) *@par Outputs: *@li y1: A tensor. Reduction indices of "x1". *@li y2: A tensor. Reduction indices of "x2". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BroadcastGradientArgs. */ REG_OP(BroadcastGradientArgs) .INPUT(x1, TensorType({DT_INT32, DT_INT64})) @@ -542,6 +604,9 @@ REG_OP(BroadcastGradientArgs) *@par Outputs: *y: The input tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator StopGradient. */ REG_OP(StopGradient) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -558,6 +623,9 @@ REG_OP(StopGradient) *@par Outputs: *y: A tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Identity. */ REG_OP(Identity) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -574,6 +642,9 @@ REG_OP(Identity) *@par Outputs: *y: A list of Tensor objects, with the same length as the input tensor list. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator IdentityN. */ REG_OP(IdentityN) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -591,6 +662,9 @@ REG_OP(IdentityN) *@par Outputs: *y: A tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ExpandDims. */ REG_OP(ExpandDims) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, @@ -600,6 +674,26 @@ REG_OP(ExpandDims) DT_INT64, DT_UINT32, DT_UINT64, DT_BOOL, DT_DOUBLE})) .OP_END_FACTORY_REG(ExpandDims) +/** +*@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape is changed, without changing the data. + +*@par Inputs: +*@li x: Original tensor. +*@li axis: List of ints. + +*@par Outputs: +*y: Reshape tensor with same data as input. + +*@par Third-party framework compatibility +*Compatible with the Onnx operator Unsqueeze. +*/ + +REG_OP(Unsqueeze) + .INPUT(x, TensorType({DT_FLOAT32, DT_INT32, DT_UINT8, DT_BOOL})) + .OUTPUT(y, TensorType({DT_FLOAT32, DT_INT32, DT_UINT8, DT_BOOL})) + .ATTR(axes, ListInt, {}) + .OP_END_FACTORY_REG(Unsqueeze) + /** *@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. @@ -616,6 +710,10 @@ REG_OP(ExpandDims) *@par Attention: *This operator cannot be directly called by the acllopExecute API. + +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator Reshape. +*@li Compatible with the Caffe operator Reshape. */ REG_OP(Reshape) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, @@ -638,6 +736,9 @@ REG_OP(Reshape) *@par Outputs: *y: A tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Squeeze. */ REG_OP(Squeeze) .INPUT(x, TensorType::ALL()) @@ -653,6 +754,9 @@ REG_OP(Squeeze) *@par Outputs: *y: A tensor. The rank of input tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Rank. */ REG_OP(Rank) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -671,6 +775,9 @@ REG_OP(Rank) *@par Outputs: *y: A tensor. The size of the input tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Size. */ REG_OP(Size) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -686,11 +793,15 @@ REG_OP(Size) *x: A tensor. *@par Attributes: -*index: Index of the input tensor of type int32 or int64. +*index: Index of the input tensor.The data type must be int32 or int64. \n +Assume that net has three data nodes, one should be set 0, another should \n +be set 1, and the left should be set 2. *@par Outputs: *y: A tensor. +*@par Third-party framework compatibility +*Compatible with the Caffe operator Data. */ REG_OP(Data) .INPUT(x, TensorType::ALL()) @@ -712,6 +823,9 @@ REG_OP(Data) *@par Outputs: *y: The created placeholder tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator PlaceHolder. */ REG_OP(PlaceHolder) .INPUT(x, TensorType::ALL()) @@ -735,6 +849,8 @@ REG_OP(PlaceHolder) *@par Outputs: *y: The created placeholder tensor. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator PlaceholderWithDefault. */ REG_OP(PlaceholderWithDefault) .INPUT(x, TensorType::ALL()) @@ -754,6 +870,8 @@ REG_OP(PlaceholderWithDefault) *@par Outputs: *y: A tensor. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ReadVariableOp. */ REG_OP(ReadVariableOp) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -785,6 +903,9 @@ REG_OP(Summary) *@par Outputs: *y: A tensor. The shape of the input tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Size. */ REG_OP(Shape) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -804,6 +925,9 @@ REG_OP(Shape) *@par Outputs: *y: A list of tensors with the same length as the input list of tensors. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ShapeN. */ REG_OP(ShapeN) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -824,6 +948,9 @@ REG_OP(ShapeN) *@par Outputs: *y: A tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Empty. */ REG_OP(Empty) .INPUT(shape, TensorType({DT_INT32})) @@ -850,6 +977,9 @@ specifying the padding sizes. *@attention Constraints: \n *MirrorPadGrad runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MirrorPadGrad. */ REG_OP(MirrorPadGrad) @@ -878,6 +1008,8 @@ DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL. *@attention Constraints:\n *Where runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Where. */ REG_OP(Where) @@ -887,6 +1019,7 @@ REG_OP(Where) .OP_END_FACTORY_REG(Where) /** +*@brief Derived from the Caffe operator Split that splits an input blob to * multiple output blobs for feeding a blob into multiple output layers. \n *The Split node is removed from the graph after the split operation is completed. @@ -896,12 +1029,16 @@ fp16, fp32, int8, uint8, int16, uint16, int32, uint32, int64, uint64. *@par Outputs: *y: A Tensor. Has the same type as "x".It's required and the value should equal to output_num. + +*@par Attributes: +*@li N: A required int. The parameter will get the number of dynamic outputs. */ REG_OP(Copy) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) - .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ + .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16, \ DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) + .REQUIRED_ATTR(N, Int) .OP_END_FACTORY_REG(Copy); /** @@ -913,9 +1050,12 @@ REG_OP(Copy) `farmhash::fingerprint64`. *@par Outputs: +y: A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to \n `data`'s first dimension, and the second dimension size depends on the \n fingerprint algorithm. +*@par Third-party framework compatibility +* Compatible with TensorFlow Fingerprint operator. */ REG_OP(Fingerprint) diff --git a/third_party/fwkacllib/inc/ops/audio_ops.h b/third_party/fwkacllib/inc/ops/audio_ops.h index 6dff7712..6db181f9 100644 --- a/third_party/fwkacllib/inc/ops/audio_ops.h +++ b/third_party/fwkacllib/inc/ops/audio_ops.h @@ -42,6 +42,10 @@ per time slice. *@attention Constraints: \n *Mfcc runs on the Ascend AI CPU, which delivers poor performance. \n + + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Mfcc. */ REG_OP(Mfcc) @@ -72,6 +76,9 @@ REG_OP(Mfcc) *@attention Constraints: \n *AudioSpectrogram runs on the Ascend AI CPU, which delivers \n poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AudioSpectrogram. */ REG_OP(AudioSpectrogram) @@ -100,6 +107,9 @@ Length of audio requested. *@attention Constraints: \n *DecodeWav runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator DecodeWav. */ REG_OP(DecodeWav) @@ -124,6 +134,8 @@ REG_OP(DecodeWav) *@attention Constraints:\n *EncodeWav runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with tensorflow Operator EncodeWav. */ REG_OP(EncodeWav) diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h index c4d995c4..47c5b06b 100644 --- a/third_party/fwkacllib/inc/ops/batch_ops.h +++ b/third_party/fwkacllib/inc/ops/batch_ops.h @@ -51,6 +51,9 @@ the same types as "x_tensors". *@attention Constraints: \n *Batch runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Batch. */ REG_OP(Batch) @@ -92,6 +95,9 @@ across multiple sessions. *@attention Constraints: \n *Unbatch runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Unbatch. */ REG_OP(Unbatch) @@ -128,6 +134,9 @@ across multiple sessions. *@attention Constraints: \n *UnbatchGrad runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UnbatchGrad. */ REG_OP(UnbatchGrad) diff --git a/third_party/fwkacllib/inc/ops/bitwise_ops.h b/third_party/fwkacllib/inc/ops/bitwise_ops.h index 53d5c005..ccbeb04c 100644 --- a/third_party/fwkacllib/inc/ops/bitwise_ops.h +++ b/third_party/fwkacllib/inc/ops/bitwise_ops.h @@ -36,6 +36,9 @@ int64, uint8, uint16, uint32, uint64. \n *@attention Constraints: \n *Unique runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator RightShift. */ REG_OP(RightShift) diff --git a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h index edc57e0c..37345833 100644 --- a/third_party/fwkacllib/inc/ops/boosted_trees_ops.h +++ b/third_party/fwkacllib/inc/ops/boosted_trees_ops.h @@ -41,6 +41,9 @@ a single feature. *@attention Constraints: \n *BoostedTreesBucketize runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BoostedTreesBucketize. */ REG_OP(BoostedTreesBucketize) diff --git a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h index c2b5a3f8..50178a59 100644 --- a/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h +++ b/third_party/fwkacllib/inc/ops/candidate_sampling_ops.h @@ -57,6 +57,9 @@ in a batch of sampled candidates. *@attention Constraints: \n *ThreadUnsafeUnigramCandidateSampler runs on the Ascend AI CPU, \n which delivers poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ThreadUnsafeUnigramCandidateSampler. */ REG_OP(ThreadUnsafeUnigramCandidateSampler) @@ -108,6 +111,9 @@ sampled candidate representing the number of times. *@attention Constraints: \n *UniformCandidateSampler runs on the Ascend AI CPU, \n which delivers poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator UniformCandidateSampler. */ REG_OP(UniformCandidateSampler) @@ -171,6 +177,9 @@ If "unique" is true, then this is a probability. *@attention Constraints: \n * FixedUnigramCandidateSampler runs on the Ascend AI CPU, \n which delivers poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator FixedUnigramCandidateSampler. */ REG_OP(FixedUnigramCandidateSampler) @@ -227,6 +236,9 @@ to occur in a batch of sampled candidates. \n *@attention Constraints: \n *LearnedUnigramCandidateSampler runs on the Ascend AI CPU, which delivers \n poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator LearnedUnigramCandidateSampler. */ REG_OP(LearnedUnigramCandidateSampler) @@ -276,6 +288,9 @@ to occur in a batch of sampled candidates. \n *@attention Constraints: \n *LogUniformCandidateSampler runs on the Ascend AI CPU, which delivers \n poor performance. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator LogUniformCandidateSampler. */ REG_OP(LogUniformCandidateSampler) @@ -321,6 +336,9 @@ to occur in a batch of sampled candidates. If "unique" is true, then this is a p *@attention Constraints: \n *AllCandidateSampler runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AllCandidateSampler. */ REG_OP(AllCandidateSampler) @@ -358,6 +376,9 @@ each element is -FLOAT_MAX. *@attention Constraints: \n *ComputeAccidentalHits runs on the Ascend AI CPU, which delivers poor performance. \n + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ComputeAccidentalHits. */ REG_OP(ComputeAccidentalHits) diff --git a/third_party/fwkacllib/inc/ops/control_flow_ops.h b/third_party/fwkacllib/inc/ops/control_flow_ops.h index 5eebb9e3..77980b67 100644 --- a/third_party/fwkacllib/inc/ops/control_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/control_flow_ops.h @@ -41,6 +41,8 @@ namespace ge { *@see Switch() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator Merge. */ REG_OP(Merge) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -71,6 +73,8 @@ REG_OP(Merge) *@see Switch() | Merge() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator RefMerge. */ REG_OP(RefMerge) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -101,6 +105,8 @@ REG_OP(RefMerge) *@see Merge() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator Switch. */ REG_OP(Switch) .INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -134,6 +140,8 @@ REG_OP(Switch) *@see Merge() | Switch() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator RefSwitch. */ REG_OP(RefSwitch) .INPUT(data, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -181,6 +189,8 @@ REG_OP(SwitchN) *@see Exit() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator Enter. */ REG_OP(Enter) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -215,6 +225,8 @@ REG_OP(Enter) *@see Exit() | Enter() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator RefEnter. */ REG_OP(RefEnter) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -239,6 +251,8 @@ REG_OP(RefEnter) *@see Switch() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator LoopCond. */ REG_OP(LoopCond) .INPUT(x, TensorType({DT_BOOL})) @@ -256,6 +270,8 @@ REG_OP(LoopCond) *@par Outputs: *y: A Tensor. Has the same type as "x". + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator NextIteration. */ REG_OP(NextIteration) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -277,6 +293,8 @@ REG_OP(NextIteration) *@par Outputs: *y: A tensor. Has the same type as "x". + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator RefNextIteration. */ REG_OP(RefNextIteration) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -300,6 +318,8 @@ REG_OP(RefNextIteration) *@see Enter() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator Exit. */ REG_OP(Exit) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -323,6 +343,8 @@ REG_OP(Exit) *@see Enter() | Exit() + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator RefExit. */ REG_OP(RefExit) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -338,6 +360,8 @@ REG_OP(RefExit) * It is similar to a no-op that always produces a live control output \n * even when some control inputs are dead. + *@par Third-party framework compatibility + *@Compatible with the TensorFlow operator ControlTrigger. */ REG_OP(ControlTrigger) .OP_END_FACTORY_REG(ControlTrigger) diff --git a/third_party/fwkacllib/inc/ops/ctc_ops.h b/third_party/fwkacllib/inc/ops/ctc_ops.h index 893c3166..00485a14 100644 --- a/third_party/fwkacllib/inc/ops/ctc_ops.h +++ b/third_party/fwkacllib/inc/ops/ctc_ops.h @@ -47,6 +47,8 @@ repeated non-blank labels will not be merged and are interpreted as \n individual labels. This is a simplified version of CTC. \n If not specified, defaults to true +*@par Third-party framework compatibility +* Compatible with TensorFlow CTCLoss operator. */ REG_OP(CTCLoss) diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h index dac7fb0b..c766167a 100644 --- a/third_party/fwkacllib/inc/ops/data_flow_ops.h +++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h @@ -34,6 +34,8 @@ the queue is open. *@par Outputs: *is_closed:A Tensor of type bool. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueIsClosed operator. */ REG_OP(QueueIsClosed) @@ -51,6 +53,8 @@ REG_OP(QueueIsClosed) *@par Outputs: *size:A Tensor of type int32. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueSize operator. */ REG_OP(QueueSize) @@ -76,6 +80,8 @@ the given name across multiple sessions. *@par Outputs: *handle:A Tensor of type mutable resource. The handle to a queue. +*@par Third-party framework compatibility +*Compatible with tensorflow FIFOQueue operator. */ REG_OP(FIFOQueue) @@ -101,6 +107,8 @@ the enqueued tensors should be taken. operation will block for up to timeout_ms milliseconds. Note: This option \n is not supported yet. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueEnqueue operator. */ REG_OP(QueueEnqueue) @@ -127,6 +135,8 @@ the enqueued tensors should be taken. operation will block for up to timeout_ms milliseconds. Note: This option \n is not supported yet. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueEnqueueMany operator. */ REG_OP(QueueEnqueueMany) @@ -156,6 +166,8 @@ component in a tuple. *@par Outputs: *components:A list of Tensor objects of type component_types. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueDequeue operator. */ REG_OP(QueueDequeue) @@ -187,6 +199,8 @@ component in a tuple. *@par Outputs: *components:A list of Tensor objects of type component_types. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueDequeueMany operator. */ REG_OP(QueueDequeueMany) @@ -219,6 +233,8 @@ component in a tuple. *@par Outputs: *components:A list of Tensor objects of type component_types. +*@par Third-party framework compatibility +*Compatible with tensorflow QueueDequeueUpTo operator. */ REG_OP(QueueDequeueUpTo) @@ -255,6 +271,8 @@ match this name to the matching Unstage Op. *@see Unstage +*@par Third-party framework compatibility +*Compatible with tensorflow Stage operator. */ REG_OP(Stage) @@ -279,6 +297,8 @@ REG_OP(Stage) *@see Stage +*@par Third-party framework compatibility +*Compatible with tensorflow StageClear operator. */ REG_OP(StageClear) @@ -307,6 +327,8 @@ container does not contain sufficient elements this op will block until it does. *@par Outputs: *y:A list of Tensor objects of type dtypes. +*@par Third-party framework compatibility +*Compatible with tensorflow StagePeek operator. */ REG_OP(StagePeek) @@ -334,6 +356,8 @@ REG_OP(StagePeek) *@par Outputs: *size:A Tensor of type int32. +*@par Third-party framework compatibility +*Compatible with tensorflow StageSize operator. */ REG_OP(StageSize) @@ -358,6 +382,8 @@ REG_OP(StageSize) *@par Outputs: *element:A Tensor of type elem_type. +*@par Third-party framework compatibility +*Compatible with tensorflow StackPop operator. */ REG_OP(StackPop) @@ -383,6 +409,8 @@ to false. *@par Outputs: *y:A Tensor. Has the same type as elem. +*@par Third-party framework compatibility +*Compatible with tensorflow StackPush operator. */ REG_OP(StackPush) @@ -403,6 +431,8 @@ REG_OP(StackPush) *The input handle must be type resource. Inputs include: \n *handle: A Tensor of type resource. The handle to a stack. +*@par Third-party framework compatibility +*Compatible with tensorflow StackClose operator. */ REG_OP(StackClose) @@ -423,6 +453,8 @@ REG_OP(StackClose) *@par Outputs: *handle: A Tensor of type resource. The handle to a stack. +*@par Third-party framework compatibility +*Compatible with tensorflow Stack operator. */ REG_OP(Stack) @@ -452,6 +484,8 @@ DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING. *@attention Constraints:\n *DynamicPartition runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator DynamicPartition. */ REG_OP(DynamicPartition) @@ -485,6 +519,8 @@ DT_QUINT8, DT_QINT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128. *@attention Constraints:\n *DynamicStitch runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator DynamicStitch. */ REG_OP(DynamicStitch) @@ -520,6 +556,8 @@ DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT32. *@attention Constraints:\n *ParallelDynamicStitch runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ParallelDynamicStitch. */ REG_OP(ParallelDynamicStitch) @@ -541,12 +579,15 @@ REG_OP(ParallelDynamicStitch) *@par Attributes:An optional int that is >= 0. Defaults to "0". *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". +*@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". *@attention Constraints:\n *MapClear runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapClear. */ REG_OP(MapClear) @@ -563,6 +604,7 @@ REG_OP(MapClear) *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". +*@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". @@ -572,6 +614,8 @@ REG_OP(MapClear) *@attention Constraints:\n *MapIncompleteSize runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapIncompleteSize. */ REG_OP(MapIncompleteSize) @@ -596,6 +640,8 @@ REG_OP(MapIncompleteSize) *@par Outputs: *y: A list of Tensor objects of type dtypes. +*@par Third-party framework compatibility +*Compatible with tensorflow Unstage operator. */ REG_OP(Unstage) @@ -629,6 +675,7 @@ DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. Maximum number of elements in the Staging Area. If > 0, \n inserts on the container will block when the capacity is reached. *@li memory_limit: An optional int that is >= 0. Defaults to "0". +*@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". \n If non-empty, this queue is placed in the given container. \n Otherwise, a default container is used. @@ -638,6 +685,8 @@ It is necessary to match this name to the matching Unstage Op. *@attention Constraints:\n *MapStage runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapStage. */ REG_OP(MapStage) @@ -680,6 +729,8 @@ DT_QINT16, DT_QUINT16, DT_QINT32. *@attention Constraints:\n *MapUnstage runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapUnstage. */ REG_OP(MapUnstage) @@ -722,6 +773,8 @@ DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. *@attention Constraints:\n *MapUnstageNoKey runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapUnstageNoKey. */ REG_OP(MapUnstageNoKey) @@ -750,6 +803,7 @@ REG_OP(MapUnstageNoKey) *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". +*@li dtypes: A list of tf.DTypes that has length >= 1. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". @@ -763,6 +817,8 @@ DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT32. *@attention Constraints:\n *MapPeek runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapPeek. */ REG_OP(MapPeek) @@ -786,6 +842,7 @@ REG_OP(MapPeek) *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". +*@li dtypes: A list of tf.DTypes. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". @@ -795,6 +852,8 @@ REG_OP(MapPeek) *@attention Constraints:\n *MatMul runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator MapSize. */ REG_OP(MapSize) @@ -831,6 +890,8 @@ in the TensorArray will be expected to have have identical shapes. *@li handle: The handle to the TensorArray. *@li flow: A scalar used to control gradient flow. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArray operator. */ REG_OP(TensorArray) @@ -853,6 +914,8 @@ REG_OP(TensorArray) *handle: A Tensor of type resource. The handle to a TensorArray \n (output of TensorArray or TensorArrayGrad). +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayClose operator. */ REG_OP(TensorArrayClose) @@ -878,6 +941,8 @@ the first axis. *@li lengths: A vector of the row sizes of the original T elements in the \n value output. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayConcat operator. */ REG_OP(TensorArrayConcat) @@ -912,6 +977,8 @@ specified, gathering zero-size TensorArrays is an error. *value: All of the elements in the TensorArray, concatenated along a new \n axis (the new dimension 0). +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayGather operator. */ REG_OP(TensorArrayGather) @@ -943,6 +1010,8 @@ TensorArray to return. *@li grad_handle: A Tensor of type resource. *@li flow_out: A Tensor of type float. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayGrad operator. */ REG_OP(TensorArrayGrad) @@ -966,6 +1035,8 @@ REG_OP(TensorArrayGrad) *@par Outputs: *flow_out: A float scalar that enforces proper chaining of operations. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayWrite operator. */ REG_OP(TensorArrayWrite) @@ -998,6 +1069,8 @@ TensorArray to return. *@li grad_handle: A Tensor of type resource. *@li flow_out: A Tensor of type float. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayGradWithShape operator. */ REG_OP(TensorArrayGradWithShape) @@ -1024,6 +1097,8 @@ REG_OP(TensorArrayGradWithShape) *@par Outputs: *y: A Tensor of type dtype. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayRead operator. */ REG_OP(TensorArrayRead) @@ -1050,6 +1125,8 @@ elements. *@par Outputs: *flow_out: A float scalar that enforces proper chaining of operations. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArrayScatter operator. */ REG_OP(TensorArrayScatter) @@ -1076,6 +1153,8 @@ the TensorArray. *@par Outputs: *flow_out: A float scalar that enforces proper chaining of operations. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArraySplit operator. */ REG_OP(TensorArraySplit) @@ -1099,6 +1178,8 @@ REG_OP(TensorArraySplit) *@par Outputs: *size: The number of elements in a TensorArray.. +*@par Third-party framework compatibility +*Compatible with tensorflow TensorArraySize operator. */ REG_OP(TensorArraySize) @@ -1124,6 +1205,8 @@ be stored in this queue. *@par Outputs: *handle: A Tensor of type resource. The handle to a stack. +*@par Third-party framework compatibility +*Compatible with tensorflow RandomShuffleQueue operator. */ REG_OP(RandomShuffleQueue) @@ -1164,6 +1247,8 @@ will be shared under the given name across multiple sessions. *@attention Constraints:\n *PaddingFIFOQueue runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator PaddingFIFOQueue. */ REG_OP(PaddingFIFOQueue) @@ -1179,6 +1264,7 @@ REG_OP(PaddingFIFOQueue) *@brief A queue that produces elements sorted by the first component value. *@par Attributes: +*@li component_types: An optional list of tf.DTypes. Defaults to {}. \n The type of each component in a value. *@li shapes: A list of shapes for each component of a queue element. The length of this attr must be either 0 or the same as the length of \n @@ -1195,6 +1281,8 @@ queue will be shared under the given name across multiple sessions. *@attention Constraints:\n *PriorityQueue runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator PriorityQueue. */ REG_OP(PriorityQueue) @@ -1221,6 +1309,8 @@ the given queue will be canceled. *@attention Constraints:\n *QueueClose runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator QueueClose. */ REG_OP(QueueClose) @@ -1257,6 +1347,8 @@ It is necessary to match this name to the matching Unstage Op. *@attention Constraints:\n *OrderedMapStage runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapStage. */ REG_OP(OrderedMapStage) @@ -1290,6 +1382,8 @@ REG_OP(OrderedMapStage) *@attention Constraints:\n *OrderedMapSize runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapSize. */ REG_OP(OrderedMapSize) @@ -1314,6 +1408,8 @@ REG_OP(OrderedMapSize) *@attention Constraints:\n *OrderedMapClear runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapClear. */ REG_OP(OrderedMapClear) @@ -1341,6 +1437,8 @@ REG_OP(OrderedMapClear) *OrderedMapIncompleteSize runs on the Ascend AI CPU, \n which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapIncompleteSize. */ REG_OP(OrderedMapIncompleteSize) @@ -1376,6 +1474,8 @@ DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT *@attention Constraints:\n *OrderedMapPeek runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapPeek. */ REG_OP(OrderedMapPeek) @@ -1418,6 +1518,8 @@ DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, DT_QINT16, DT_QUINT16, DT_QINT *OrderedMapUnstageNoKey runs on the Ascend AI CPU, \n which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapUnstageNoKey. */ REG_OP(OrderedMapUnstageNoKey) @@ -1446,6 +1548,7 @@ REG_OP(OrderedMapUnstageNoKey) *@par Attributes: *@li capacity: An optional int that is >= 0. Defaults to "0". *@li memory_limit: An optional int that is >= 0. Defaults to "0". +*@li dtypes: A list of tf.DTypes that has length >= 1. *@li container: An optional string. Defaults to "". *@li shared_name: An optional string. Defaults to "". @@ -1457,6 +1560,8 @@ DT_FLOAT16, DT_DOUBLE, DT_BOOL, DT_UINT32, DT_UINT64. *@attention Constraints:\n *OrderedMapUnstage runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator OrderedMapUnstage. */ REG_OP(OrderedMapUnstage) @@ -1496,6 +1601,8 @@ the given name across multiple sessions. *@attention Constraints:\n *Barrier runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Barrier. */ REG_OP(Barrier) @@ -1526,6 +1633,8 @@ DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128, DT_RESOURCE, DT_STRING. *@attention Constraints:\n *BarrierInsertMany runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BarrierInsertMany. */ REG_OP(BarrierInsertMany) @@ -1572,6 +1681,8 @@ DT_RESOURCE, DT_STRING. *@attention Constraints:\n *BarrierTakeMany runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BarrierTakeMany. */ REG_OP(BarrierTakeMany) @@ -1605,6 +1716,8 @@ even if no new key is introduced. *@attention Constraints:\n *BarrierClose runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BarrierClose. */ REG_OP(BarrierClose) @@ -1625,6 +1738,8 @@ REG_OP(BarrierClose) *@attention Constraints:\n *BarrierReadySize runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BarrierReadySize. */ REG_OP(BarrierReadySize) @@ -1645,6 +1760,8 @@ REG_OP(BarrierReadySize) *@attention Constraints:\n *BarrierIncompleteSize runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BarrierIncompleteSize. */ REG_OP(BarrierIncompleteSize) @@ -1672,6 +1789,8 @@ compression for the file. Currently ZLIB and GZIP are supported. *@par Outputs: *records: A Tensor of type string. +*@par Third-party framework compatibility +*Compatible with tensorflow RecordInput operator. */ REG_OP(RecordInput) @@ -1703,6 +1822,8 @@ name across multiple sessions. *@attention Constraints:\n *ConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ConditionalAccumulator. */ REG_OP(ConditionalAccumulator) @@ -1735,6 +1856,8 @@ DT_FLOAT16, DT_FLOAT, DT_DOUBLE *AccumulatorApplyGradient runs on the Ascend AI CPU, \n which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AccumulatorApplyGradient. */ REG_OP(AccumulatorApplyGradient) @@ -1759,6 +1882,8 @@ in the given accumulator. *AccumulatorNumAccumulated runs on the Ascend AI CPU, \n which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AccumulatorNumAccumulated. */ REG_OP(AccumulatorNumAccumulated) @@ -1777,6 +1902,8 @@ REG_OP(AccumulatorNumAccumulated) *@attention Constraints:\n *AccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AccumulatorSetGlobalStep. */ REG_OP(AccumulatorSetGlobalStep) @@ -1806,6 +1933,8 @@ DT_FLOAT16, DT_FLOAT, DT_DOUBLE. *AccumulatorTakeGradient runs on the Ascend AI CPU, \nwhich delivers poor performance.\n +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AccumulatorTakeGradient. */ REG_OP(AccumulatorTakeGradient) @@ -1831,6 +1960,8 @@ default is "MEAN". *@par Outputs: *handle: The handle to the accumulator. +*@par Third-party framework compatibility +*Compatible with tensorflow SparseConditionalAccumulator operator. */ REG_OP(SparseConditionalAccumulator) @@ -1863,6 +1994,8 @@ unknown, in which case the input is ignored during validation. *@li dtype: The data type of accumulated gradients. Needs to correspond to \n the type of the accumulator. +*@par Third-party framework compatibility +*Compatible with tensorflow SparseAccumulatorApplyGradient operator. */ REG_OP(SparseAccumulatorApplyGradient) @@ -1895,6 +2028,8 @@ type of the accumulator. *@li values: Values of the average of the accumulated sparse gradients. *@li shape: Shape of the average of the accumulated sparse gradients. +*@par Third-party framework compatibility +*Compatible with tensorflow SparseAccumulatorTakeGradient operator. */ REG_OP(SparseAccumulatorTakeGradient) @@ -1925,6 +2060,8 @@ name across multiple sessions. *@attention Constraints: *ResourceConditionalAccumulator runs on the Ascend AI CPU, which delivers poor performance. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ResourceConditionalAccumulator. */ REG_OP(ResourceConditionalAccumulator) @@ -1950,6 +2087,8 @@ DT_FLOAT16, DT_FLOAT, DT_DOUBLE *@attention Constraints: *ResourceAccumulatorApplyGradient runs on the Ascend AI CPU, which delivers poor performance. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ResourceAccumulatorApplyGradient. */ REG_OP(ResourceAccumulatorApplyGradient) @@ -1970,6 +2109,8 @@ REG_OP(ResourceAccumulatorApplyGradient) *@attention Constraints: *ResourceAccumulatorNumAccumulated runs on the Ascend AI CPU, which delivers poor performance. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ResourceAccumulatorNumAccumulated. */ REG_OP(ResourceAccumulatorNumAccumulated) @@ -1987,6 +2128,8 @@ REG_OP(ResourceAccumulatorNumAccumulated) *@attention Constraints: *ResourceAccumulatorSetGlobalStep runs on the Ascend AI CPU, which delivers poor performance. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ResourceAccumulatorSetGlobalStep. */ REG_OP(ResourceAccumulatorSetGlobalStep) @@ -2013,6 +2156,8 @@ DT_FLOAT16, DT_FLOAT, DT_DOUBLE. *@attention Constraints: *ResourceAccumulatorTakeGradient runs on the Ascend AI CPU, which delivers poor performance. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ResourceAccumulatorTakeGradient. */ REG_OP(ResourceAccumulatorTakeGradient) @@ -2037,6 +2182,8 @@ bool, double, string. *@attention Constraints:\n *-The implementation for OutfeedEnqueueOp on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow OutfeedEnqueueOp operator. */ REG_OP(OutfeedEnqueueOp) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 11475819..097eccc5 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -30,6 +30,9 @@ namespace ge { *@par Outputs: *y: A Tensor. Has the same shape and type as the elements of "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AddN. */ REG_OP(AddN) .DYNAMIC_INPUT(x, TensorType::NumberType()) @@ -59,6 +62,9 @@ REG_OP(AddN) *@par Outputs: * @li y1: A mutable Tensor. Has the same type as "grads". * @li y2: A mutable Tensor. Has the same type as "grads". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MaximumGrad. */ REG_OP(MaximumGrad) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -92,6 +98,9 @@ REG_OP(MaximumGrad) *@par Outputs: * @li y1: A mutable Tensor. Has the same type as "grads". * @li y2: A mutable Tensor. Has the same type as "grads". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MinimumGrad. */ REG_OP(MinimumGrad) .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -107,15 +116,15 @@ REG_OP(MinimumGrad) *@brief Cast a tensor form src data type to dst data type. *@par Inputs: -*One input:\n -*x:A `Tensor`. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8,\n +*One input: +*x:A Tensor. Must be one of the following types: bool, float16, float, int8, int32, uint32, uint8, int64, uint64, int16, uint16, double, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@par Attributes: *dst_type: An required attribute of type int32, specifying the dst data type. *@par Outputs: -*y:A `Tensor`. Has the same type as `x`. +*y:A Tensor. Has the same type as x. */ REG_OP(Cast) .INPUT(x, TensorType({DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT32, DT_UINT8, @@ -132,11 +141,15 @@ REG_OP(Cast) *@par Inputs: *Two inputs, including: -* @li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +* @li x1: A Tensor. Must be one of the following types: float16, float32, +* double, int32, int8, uint8, int64, uint16, uint32, uint64. * @li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator GreaterEqual. */ REG_OP(GreaterEqual) .INPUT(x1, TensorType::RealNumberType()) @@ -155,6 +168,9 @@ REG_OP(GreaterEqual) *@par Outputs: *y: A Tensor of type bool. + +*@par Third-party framework compatibility: +* Compatible with TensorFlow operator Less. */ REG_OP(Less) .INPUT(x1, TensorType::RealNumberType()) @@ -174,6 +190,9 @@ REG_OP(Less) *@par Outputs: * y: A Tensor. Has the same type and format as input "x1". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator RealDiv. */ REG_OP(RealDiv) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, @@ -195,6 +214,8 @@ REG_OP(RealDiv) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Sqrt. */ REG_OP(Sqrt) .INPUT(x, TensorType{(DT_FLOAT. DT_FLOAT16, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128)}) @@ -211,6 +232,9 @@ REG_OP(Sqrt) *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Maximum. */ REG_OP(Maximum) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, @@ -231,6 +255,9 @@ REG_OP(Maximum) *@par Outputs: *y: A Tensor of the same type as "x1". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator Minimum. */ REG_OP(Minimum) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT32, @@ -251,6 +278,9 @@ REG_OP(Minimum) *@par Outputs: *y:A Tensor with same type as "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Reciprocal. */ REG_OP(Reciprocal) .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_FLOAT16, @@ -269,6 +299,8 @@ REG_OP(Reciprocal) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Subtract. */ REG_OP(Sub) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_UINT8, DT_INT8, @@ -291,6 +323,9 @@ REG_OP(Sub) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Abs. */ REG_OP(Abs) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) @@ -311,6 +346,9 @@ REG_OP(Abs) *@par Outputs: * z: A tensor. Has the same type as "y". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AbsGrad. +* */ REG_OP(AbsGrad) .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -322,12 +360,14 @@ REG_OP(AbsGrad) *@brief: Computes the sign of "x". *@par Inputs: -*One inputs, include: -*x:A Tensor of type float16, float32, int32, int64, double, +*x:An ND Tensor of type float16, float32, int32, int64, double, * complex64, complex128. *@par Outputs: -*y:A Tensor with same type as "x". +*y:An ND Tensor with same type as "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Sign. */ REG_OP(Sign) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT, DT_DOUBLE, DT_INT32, @@ -346,6 +386,9 @@ REG_OP(Sign) *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator SquaredDifference. */ REG_OP(SquaredDifference) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, @@ -359,11 +402,16 @@ REG_OP(SquaredDifference) /** *@brief Computes cosine of "x" element-wise. -*@par Inputs:\n -*x: A Tensor of type float16 or float32. +*@par Inputs: +*x: A Tensor of type float16, float32, double, complex64, complex128. +* the format can be [NCHW,NC1HWC0,NHWC,ND] + +*@par Outputs: +*y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cos. -*@par Outputs:\n -*y: A Tensor of type float16 or float32. */ REG_OP(Cos) .INPUT(x, TensorType::UnaryDataType()) @@ -375,11 +423,16 @@ REG_OP(Cos) *@par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8, float64, int64, uint16, int16, complex64, complex128 -*@li x2: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8, float64, int64, uint16, int16, complex64, complex128 +*@li x1: A Tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8, float64, int64, uint16, int16, +* complex64, complex128, the format can be [NCHW,NC1HWC0,NHWC,ND]. +*@li x2: A Tensor. Has the same type and format as input "x1". *@par Outputs: * y: A Tensor. Has the same type and format as input "x1". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Div. */ REG_OP(Div) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32, @@ -398,12 +451,17 @@ REG_OP(Div) *@par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8, \n -* double, int16, int64, complex64, complex128, quint8, qint8, qint32, string, bool. -*@li x2: A Tensor of the same type as "x1". +*@li x1: A Tensor. Must be one of the following types: +* float16, float32, int32, int8, uint8, double, int16, int64, complex64, +* complex128, quint8, qint8, qint32, string, bool. the format can be +* [NCHW, NC1HWC0, NHWC, ND] +*@li x2: A Tensor of the same type and format as "x1". *@par Outputs: *y: A Tensor of type bool. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Equal. */ REG_OP(Equal) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8, @@ -422,7 +480,7 @@ REG_OP(Equal) *@par Inputs: *One input:\n -*x: A Tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. +*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. *@par Attributes: *@li base: An optional attribute of type float32, specifying the base gamma. Defaults to "-1". @@ -431,6 +489,9 @@ REG_OP(Equal) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Exp. */ REG_OP(Exp) .INPUT(x, TensorType::UnaryDataType()) @@ -444,11 +505,14 @@ REG_OP(Exp) *@brief Computes the exp(x) - 1 element-wise, y = e^x - 1. *@par Inputs: -*One input:\n +*One input: *x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Expm1. */ REG_OP(Expm1) .INPUT(x, TensorType::UnaryDataType()) @@ -463,6 +527,9 @@ REG_OP(Expm1) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Inv. */ REG_OP(Inv) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT32,DT_INT64,DT_COMPLEX64,DT_COMPLEX128})) @@ -477,6 +544,12 @@ REG_OP(Inv) * Two inputs, including: * @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8. * @li grad: A Tensor. Has the same type as "x". + +*@par Outputs: +*y: A Tensor, Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator InvGrad. */ REG_OP(InvGrad) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) @@ -489,11 +562,16 @@ REG_OP(InvGrad) *@par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +*@li x1: A Tensor. Must be one of the following types: float32, float64, +* int32, uint8, int16, int8, int64, qint8, quint8, qint32, uint16, +* float16, uint32, uint64. *@li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor of type bool. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator LessEqual. */ REG_OP(LessEqual) .INPUT(x1, TensorType::RealNumberType()) @@ -510,6 +588,9 @@ REG_OP(LessEqual) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Log1p. */ REG_OP(Log1p) .INPUT(x, TensorType::UnaryDataType()) @@ -520,11 +601,14 @@ REG_OP(Log1p) *@brief Returns element-wise remainder of division. *@par Inputs: *Two inputs, including: -* @li x1: A Tensor. Must be one of the following types: float16, float32, int32, int64, int8, uint8, double. +* @li x1: A Tensor. Must be one of the following types: float16, float32, + * int32, int64, int8, uint8, double. * @li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Mod. */ REG_OP(Mod) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, @@ -540,11 +624,15 @@ REG_OP(Mod) *@par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, + * int8, uint8, double, int16, int64, uint16, half, uint32, uint64 *@li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor of type bool. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator NotEqual. */ REG_OP(NotEqual) .INPUT(x1, TensorType::RealNumberType()) @@ -557,10 +645,14 @@ REG_OP(NotEqual) *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float16, float32, int32 +*x: A Tensor. Must be one of the following types: float16, float32, int32, + * int64, complex64, complex128. *@par Outputs: *y: A Tensor. Has the same type and format as input "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Neg. */ REG_OP(Neg) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128})) @@ -570,15 +662,22 @@ REG_OP(Neg) /** *@brief Returns x1/x2 element-wise for integer types. -*@par Inputs:\n -*@li x1: A Tensor of type float16, float32, int32, int8, or uint8. +*@par Inputs: +*@li x1: A Tensor. Must be one of the following types: +* float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, uint16, +* complex128, float16, uint32, uint64, complex64, complex128. *@li x2: A Tensor of the same data type as "x1". -*@par Outputs:\n -*y: A Tensor of type float16, float32, int32, int8, or uint8. +*@par Outputs: +*y: A Tensor. Has the same type as "x1". -*@attention Constraints:\n +*@attention Constraints: * Broadcasting is supported. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator TruncateDiv. + */ REG_OP(TruncateDiv) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT32, @@ -597,11 +696,15 @@ REG_OP(TruncateDiv) *@par Inputs: * Two inputs, including: -* @li x1: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. +* @li x1: A Tensor. Must be one of the following types: float16, float32, +* double, complex64, complex128. * @li x2: A Tensor. Has the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Xdivy. */ REG_OP(Xdivy) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, @@ -613,15 +716,20 @@ REG_OP(Xdivy) .OP_END_FACTORY_REG(Xdivy) /** -*@brief Computes "x" multiplied by the logarithm of y element-wise, if "x" == 0, return "0". +*@brief Computes "x" multiplied by the logarithm of y element-wise, +* if "x" == 0, return "0". *@par Inputs: * Two inputs, including: -* @li x1: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128. +* @li x1: A Tensor. Must be one of the following types: float16, float32, +* double, complex64, complex128. * @li x2: A Tensor. Has the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Xlogy. */ REG_OP(Xlogy) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, @@ -641,6 +749,9 @@ REG_OP(Xlogy) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Square. */ REG_OP(Square) .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT16, DT_FLOAT, @@ -655,10 +766,14 @@ REG_OP(Square) * *@par Inputs: -* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. +* x: An ND or 5HD tensor. Must be one of the following types: float, double, half, + * complex64, complex128. * *@par Outputs: -* y: A tensor. Has the same type as "x". +* y: An ND or 5HD tensor. Has the same type as "x". +* +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Rsqrt. * */ REG_OP(Rsqrt) @@ -676,6 +791,9 @@ REG_OP(Rsqrt) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Asin. +* */ REG_OP(Asin) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -689,7 +807,7 @@ REG_OP(Asin) * *@par Inputs: -*@li y: A tensor of type float16 or float32. +*@li y: A tensor of type float16, float32, float64, int32, int64, complex64, complex128. *@li dy: A tensor of the same type as "y". * *@attention Constraints: @@ -698,6 +816,9 @@ REG_OP(Asin) *@par Outputs: * z: A tensor. Has the same type as "y". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AsinGrad. +* */ REG_OP(AsinGrad) .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -718,6 +839,9 @@ REG_OP(AsinGrad) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Acos. +* */ REG_OP(Acos) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, @@ -740,6 +864,9 @@ REG_OP(Acos) *@par Outputs: * z: A tensor. Has the same type as "y". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AcosGrad. +* */ REG_OP(AcosGrad) .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -761,6 +888,9 @@ REG_OP(AcosGrad) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Acosh. +* */ REG_OP(Acosh) .INPUT(x, TensorType::UnaryDataType()) @@ -781,6 +911,9 @@ REG_OP(Acosh) *@par Outputs: * z: A tensor. Has the same type as "y". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AcoshGrad. +* */ REG_OP(AcoshGrad) .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -794,13 +927,16 @@ REG_OP(AcoshGrad) * *@par Inputs: *@li x1: A tensor of type bool. -*@li x2 A tensor of the same type as "x1". +*@li x2: A tensor of the same type as "x1". * *@attention Constraints: * LogicalOr supports broadcasting. * *@par Outputs: -* z: A tensor of the same type as "x1". +* y: A tensor of the same type as "x1". +* +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator LogicalOr. * */ REG_OP(LogicalOr) @@ -815,13 +951,16 @@ REG_OP(LogicalOr) * *@par Inputs: *@li x1: A tensor of type bool. -*@li x2 A tensor of the same type as "x1". +*@li x2: A tensor of the same type as "x1". * *@attention Constraints: -* LogicalOr supports broadcasting. +* LogicalAnd supports broadcasting. * *@par Outputs: -* z: A tensor of the same type as "x1". +* y: A tensor of the same type as "x1". +* +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator LogicalAnd. * */ REG_OP(LogicalAnd) @@ -831,9 +970,9 @@ REG_OP(LogicalAnd) .OP_END_FACTORY_REG(LogicalAnd) /** -*@brief Computes the Bessel i0e function of "x" element-wise.\n -* Exponentially scaled modified Bessel function of order 0 \n -* defined as: bessel_i0e(x) = exp(-abs(x)) bessel_i0(x).\n +*@brief Computes the Bessel i0e function of "x" element-wise. +* Exponentially scaled modified Bessel function of order 0 +* defined as: bessel_i0e(x) = exp(-abs(x)) bessel_i0(x). * This function is faster and numerically stabler than "bessel_i0(x)". * *@par Inputs: @@ -842,6 +981,9 @@ REG_OP(LogicalAnd) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BesselI0e. +* */ REG_OP(BesselI0e) .INPUT(x, TensorType::FloatingDataType()) @@ -849,9 +991,9 @@ REG_OP(BesselI0e) .OP_END_FACTORY_REG(BesselI0e) /** -*@brief Computes the Bessel i1e function of "x" element-wise.\n -* Exponentially scaled modified Bessel function of order 0 \n -* defined as: bessel_i1e(x) = exp(-abs(x)) bessel_i1(x).\n +*@brief Computes the Bessel i1e function of "x" element-wise. +* Exponentially scaled modified Bessel function of order 0 +* defined as: bessel_i1e(x) = exp(-abs(x)) bessel_i1(x). * This function is faster and numerically stabler than "bessel_i1(x)". * *@par Inputs: @@ -860,6 +1002,9 @@ REG_OP(BesselI0e) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BesselI1e. +* */ REG_OP(BesselI1e) .INPUT(x, TensorType::FloatingDataType()) @@ -867,7 +1012,7 @@ REG_OP(BesselI1e) .OP_END_FACTORY_REG(BesselI1e) /** -* @brief Computes logarithm of x element-wise.\n +* @brief Computes logarithm of x element-wise. * y = log_base(shift + scale * x), with "base" > 0. * @par Inputs: @@ -888,6 +1033,10 @@ REG_OP(BesselI1e) * value "-1" sets "base" to "e". * @li If the input value of operator Log is within the range (0, 0.01] or \n * [0.95, 1.05], the output accuracy is subject to change. + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator Log. +* @li Compatible with the Caffe operator Log. */ REG_OP(Log) .INPUT(x, TensorType::UnaryDataType()) @@ -912,6 +1061,10 @@ REG_OP(Log) * uint8, int8, uint16, int16, int32, int64, complex64, complex128. * @attention Constraints: +* @li "x1" and "x2" have incompatible shapes or types. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Multiply. */ REG_OP(Mul) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_UINT8, DT_INT8, @@ -931,6 +1084,7 @@ REG_OP(Mul) * input gradient. * @par Inputs: +* Two inputs, including: * @li y: A Tensor of type float32 or float16. * @li dy: A Tensor. Has the same type as "y". @@ -962,6 +1116,8 @@ REG_OP(Multiply) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Add. */ REG_OP(Add) .INPUT(x1, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16, @@ -986,6 +1142,9 @@ REG_OP(Add) *@par Outputs: *@li y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator LRN. */ REG_OP(FusedMulAdd) @@ -1001,7 +1160,7 @@ REG_OP(FusedMulAdd) * *@par Inputs: *@li x1: A tensor. Must be one of the following types: float16, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128. -*@li x2 A tensor of the same type as "x1". +*@li x2: A tensor of the same type as "x1". * *@attention Constraints: * AddV2 supports broadcasting. @@ -1009,6 +1168,9 @@ REG_OP(FusedMulAdd) *@par Outputs: * y: A tensor. Has the same type as "x1". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AddV2. +* */ REG_OP(AddV2) .INPUT(x1, TensorType({DT_FLOAT, DT_INT32, DT_INT64, DT_FLOAT16, DT_INT16, @@ -1026,20 +1188,23 @@ REG_OP(AddV2) *@brief Updates "ref" by adding "value" to it. *@par Inputs: -*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int32, int64. +*@li ref: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. *@li value: A Tensor of the same type as "ref". *@par Attributes: -*use_locking: An optional bool. Defaults to "False".\n - If "True", the addition will be protected by a lock;\n - otherwise the behavior is undefined, but may exhibit less contention.\n +*use_locking: An optional bool. Defaults to "False". + If "True", the addition will be protected by a lock; + otherwise the behavior is undefined, but may exhibit less contention. * This attribute is reserved. *@par Outputs: *ref: A Tensor that holds the new value of ref after the value has been added. -*@attention Constraints:\n +*@attention Constraints: *An input tensor of type int64 must have a shape with size 1. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AssignAdd. */ REG_OP(AssignAdd) .INPUT(ref, TensorType::BasicType()) @@ -1056,17 +1221,20 @@ REG_OP(AssignAdd) *@li value: A Tensor of the same type as "ref". *@par Attributes: -*@li validate_shape: An optional bool. Defaults to "true".\n - If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to.\n -* If "false", "ref" will take on the shape of "value".\n +*@li validate_shape: An optional bool. Defaults to "true". + If "true", the operation will validate that the shape of "value" matches the shape of the Tensor being assigned to. +* If "false", "ref" will take on the shape of "value". * This attribute is reserved. -*@li use_locking: An optional bool. Defaults to True.\n - If True, the assignment will be protected by a lock;\n - otherwise the behavior is undefined, but may exhibit less contention.\n +*@li use_locking: An optional bool. Defaults to True. + If True, the assignment will be protected by a lock; + otherwise the behavior is undefined, but may exhibit less contention. * This attribute is reserved. *@par Outputs: *ref: A Tensor that holds the new value of ref after the value has been assigned. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Assign. */ REG_OP(Assign) .INPUT(ref, TensorType::BasicType()) @@ -1093,6 +1261,9 @@ REG_OP(Assign) *@par Outputs: * y: A tensor. Has the same type as "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AssignSub. +* */ REG_OP(AssignSub) .INPUT(var, TensorType::NumberType()) @@ -1106,13 +1277,17 @@ REG_OP(AssignSub) *@par Inputs: * Two inputs, including: -*@li y: A Tensor. Must be one of the following types: float16, float32, int32, int8. -*@li dy: A Tensor of the same type as "y". +*@li y: An NCHW, NC1HWC0, NHWC, ND Tensor. Must be one of the following types: \ + * float, int32, int8, double, complex64, complex128, half. +*@li dy: A Tensor of the same type and format as "y". *@par Outputs: -*z: A Tensor of the same type as "y". +*z: A Tensor of the same type and format as "y". *@see Matmul() | Rsqrt () + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator RsqrtGrad. */ REG_OP(RsqrtGrad) .INPUT(y, TensorType({UnaryDataType,int32,int8})) @@ -1123,11 +1298,17 @@ REG_OP(RsqrtGrad) /** *@brief Computes hyperbolic sine of "x" element-wise. -*@par Inputs:\n -*x: A Tensor of type float16 or float32. +*@par Inputs: +*x: An NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64, + * complex128, half. + +*@par Outputs: +*y: A NCHW, NC1HWC0, NHWC,or ND Tensor of type float, double, complex64, + * complex128, half. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Sinh. -*@par Outputs:\n -*y: A Tensor of type float16 or float32. */ REG_OP(Sinh) .INPUT(x, TensorType::UnaryDataType()) @@ -1139,12 +1320,16 @@ REG_OP(Sinh) *@par Inputs: * Three inputs, including: -*@li x: A Tensor of type float16, float32, or int32. +*@li x: A Tensor of type float32, float64, int32, uint8, int16, int8, complex64, int64, +*qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. *@li clip_value_min: A Tensor of the same type as "x". *@li clip_value_max: A Tensor of the same type as "x". *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ClipByValue. */ REG_OP(ClipByValue) .INPUT(x, TensorType::NumberType()) @@ -1157,10 +1342,15 @@ REG_OP(ClipByValue) *@brief Computes cosine of "x" element-wise. *@par Inputs: -*x: A Tensor of type float16 or float32. +*x: A Tensor of type float16, float32, double, complex64, complex128. +* the format can be [NCHW,NC1HWC0,NHWC,ND]. *@par Outputs: -*y: A Tensor of type float16 or float32. +*y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cosh. + */ REG_OP(Cosh) .INPUT(x, TensorType::UnaryDataType()) @@ -1172,19 +1362,23 @@ REG_OP(Cosh) *@par Inputs: * Two inputs, including: -*@li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8, double, complex64, complex128. +*@li x1: A Tensor. Must be one of the following types:float16, float32, int32, +* int8, uint8, double, the format can be [NCHW,NC1HWC0,NHWC,ND]. *@li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator DivNoNan. */ REG_OP(DivNoNan) .INPUT(x1, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16, - DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + DT_DOUBLE})) .INPUT(x2, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16, - DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + DT_DOUBLE})) .OUTPUT(y, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16, - DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128})) + DT_DOUBLE})) .OP_END_FACTORY_REG(DivNoNan) /** @@ -1192,10 +1386,15 @@ REG_OP(DivNoNan) *@par Inputs: * One input: \n -*x: A Tensor of type int16 or uint16. Up to 8D. +*x: A Tensor, Must be one of the following types: +* int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64, +* and format can be [NCHW,NC1HWC0,NHWC,ND] *@par Outputs: *y: A Tensor. Has the same type and format as "x" + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Invert. */ REG_OP(Invert) .INPUT(x, TensorType::IntegerDataType()) @@ -1206,10 +1405,14 @@ REG_OP(Invert) *@brief Returns a tensor of the same shape and type with all elements set to one. *@par Inputs: *One input: \n -*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, int16, uint16, int32, int64, complex128, bool. +*x: A Tensor. Must be one of the following types: float16, float32, int8, uint8, + * int16, uint16, int32, int64, complex128, bool. *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator OnesLike. */ REG_OP(OnesLike) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, @@ -1224,14 +1427,20 @@ REG_OP(OnesLike) *@brief Computes the gradient for the inverse of "x" with regard its input. *@par Inputs: -*@li input_y: A Tensor. Must be one of the following types: float16, float32, int8, int32. -*@li input_dy: A Tensor. Must be one of the following types: float16, float32, int8, int32. +*@li input_y: A Tensor. Must be one of the following types: float, double, + * complex64, complex128, half. +*@li input_dy: A Tensor. Must be one of the following types: float, double, + * complex64, complex128, half. -*@par Outputs:\n -*output_data: A Tensor. Must be one of the following types: float16, float32, int8, int32. +*@par Outputs: +*output_data: A Tensor. Must be one of the following types: float, double, + * complex64, complex128, half. -*@attention Constraints:\n +*@attention Constraints: * "input_dy" has the same shape and type as "input_y". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator reciprocal_grad. */ REG_OP(ReciprocalGrad) .INPUT(y, TensorType::UnaryDataType()) @@ -1242,16 +1451,20 @@ REG_OP(ReciprocalGrad) /** *@brief Returns the truth value of (x1 > x2) element-wise. -*@par Inputs:\n -*@li x1: A Tensor of type float16, float32, int32, int8, or uint8. - +*@par Inputs: +*@li x1: A Tensor of type float16, float32, double, int64, int32, int16, int8, +* uint8, uint16, uint32, uint64. *@li x2: A Tensor of the same data type as "x1". -*@par Outputs:\n +*@par Outputs: *y: A Tensor of type bool. -*@attention Constraints:\n +*@attention Constraints: * Broadcasting is supported. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Greater. + */ REG_OP(Greater) .INPUT(x1, TensorType::RealNumberType()) @@ -1262,14 +1475,20 @@ REG_OP(Greater) /** *@brief Returns a tensor of the same type and shape as the input tensor with all elements set to zero. -*@par Inputs:\n -*x: A Tensor. Must be one of the following types: float16, float32, int8, int32, uint8. +*@par Inputs: +*x: A Tensor. Must be one of the following types: +* float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, +* complex128, float16, uint32, uint64, complex64, complex128. -*@par Outputs:\n -*y: A Tensor. Must be one of the following types: float16, float32, int8, int32, uint8. +*@par Outputs: +*y: A Tensor of the same data type as "x". -*@attention Constraints:\n +*@attention Constraints: * The output has the same shape and type as the input. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator zeros_like. */ REG_OP(ZerosLike) .INPUT(x, TensorType::BasicType()) @@ -1279,14 +1498,17 @@ REG_OP(ZerosLike) /** *@brief Returns the truth value of NOT "x" element-wise. -*@par Inputs:\n -*x: A Tensor of type int8. +*@par Inputs: +*x: A Tensor of type bool. -*@par Outputs:\n -*y: A Tensor of type int8. +*@par Outputs: +*y: A Tensor of type bool. -*@attention Constraints:\n +*@attention Constraints: * The input and output values are "1" or "0", corresponding to bool values "true" and "false". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator logical_not. */ REG_OP(LogicalNot) .INPUT(x, TensorType({DT_BOOL})) @@ -1294,7 +1516,7 @@ REG_OP(LogicalNot) .OP_END_FACTORY_REG(LogicalNot) /** -*@brief Computes inverse hyperbolic sine of x element-wise.\n +*@brief Computes inverse hyperbolic sine of x element-wise. * Given an input tensor, this function computes inverse hyperbolic sine for every element in the tensor. * @@ -1304,6 +1526,9 @@ REG_OP(LogicalNot) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Asinh. +* */ REG_OP(Asinh) .INPUT(x, TensorType::UnaryDataType()) @@ -1315,12 +1540,15 @@ REG_OP(Asinh) * *@par Inputs: -*@li y: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. +*@li y: A tensor. Must be one of the following types: float16, float32. *@li dy: A tensor of the same type as "y" * *@par Outputs: * z: A tensor. Has the same type as "y". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AsinhGrad. +* */ REG_OP(AsinhGrad) .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1339,6 +1567,9 @@ REG_OP(AsinhGrad) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Atanh. +* */ REG_OP(Atanh) .INPUT(x, TensorType::UnaryDataType()) @@ -1346,16 +1577,19 @@ REG_OP(Atanh) .OP_END_FACTORY_REG(Atanh) /** -*@brief Computes the trignometric inverse tangent of x element-wise.\n +*@brief Computes the trignometric inverse tangent of x element-wise. * The atan operation returns the inverse of tan, such that if y = tan(x) then, x = atan(y). * *@par Inputs: -* x: A tensor. Must be one of the following types: float16, float32, float64, int32, int64, complex64, complex128. +* x: A tensor. Must be one of the following types: float16, float32, float64, complex64, complex128. * *@par Outputs: * y: A tensor. Has the same type as "x". The output of atan will lie within the invertible range of tan, i.e (-pi/2, pi/2). * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Atan. +* */ REG_OP(Atan) .INPUT(x, TensorType::UnaryDataType()) @@ -1373,6 +1607,9 @@ REG_OP(Atan) *@par Outputs: * z: A tensor. Has the same type as "y". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AtanGrad. +* */ REG_OP(AtanGrad) .INPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1391,6 +1628,9 @@ REG_OP(AtanGrad) *@par Outputs: * y: A tensor. Has the same type as "x1". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Atan2. +* */ REG_OP(Atan2) .INPUT(x1, TensorType::FloatingDataType()) @@ -1412,6 +1652,9 @@ REG_OP(Atan2) *@par Outputs: * y: A tensor of type bool. * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApproximateEqual. +* */ REG_OP(ApproximateEqual) .INPUT(x1, TensorType::NumberType()) @@ -1426,6 +1669,7 @@ REG_OP(ApproximateEqual) * *@par Inputs: +*Dynamic inputs, including: * x: A tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, \n qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. * *@par Outputs: @@ -1433,6 +1677,9 @@ REG_OP(ApproximateEqual) * *@par Attributes: * N: the size of x. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator AccumulateNV2. +* */ REG_OP(AccumulateNV2) .DYNAMIC_INPUT(x, TensorType::NumberType()) @@ -1441,7 +1688,7 @@ REG_OP(AccumulateNV2) .OP_END_FACTORY_REG(AccumulateNV2) /** -*@brief Fake-quantizes the input Tensor, type float to output a Tensor of same type. \n +*@brief Fake-quantizes the input Tensor, type float to output a Tensor of same type. * [min, max] define the clamping range for the "inputs" data.\n * the values of "x" are quantized into the quantization range ([0, 2^num_bits - 1] \n * when "narrow_range" is "false" or [1, 2^num_bits - 1] when it is "true") and \n @@ -1450,17 +1697,20 @@ REG_OP(AccumulateNV2) * Quantization is called fake since the output is still in floating point. \n *@par Inputs: -*One input: \n +*One input: *x: A Tensor of type float32. *@par Attributes: -*@li min: An optional attribute. Defaults to "-6". -*@li max: An optional attribute. Defaults to "6". +*@li min: An optional attribute. Defaults to "-6.0". +*@li max: An optional attribute. Defaults to "6.0". *@li num_bits: An optional attribute. Defaults to "8". *@li narrow_range: An optional bool. Defaults to "false". *@par Outputs: *y: A Tensor. Has the same shape and type of "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator FakeQuantWithMinMaxArgs. */ REG_OP(FakeQuantWithMinMaxArgs) .INPUT(x, TensorType({DT_FLOAT})) @@ -1481,13 +1731,16 @@ REG_OP(FakeQuantWithMinMaxArgs) * This is the input Tensor of the FakeQuantWithMinMaxArgs operator.\n *@par Attributes: -*@li min: An optional attribute. Defaults to "-6". -*@li max: An optional attribute. Defaults to "6". +*@li min: An optional attribute. Defaults to "-6.0". +*@li max: An optional attribute. Defaults to "6.0". *@li num_bits: An optional attribute. Defaults to "8". *@li narrow_range: An optional bool. Defaults to "False". *@par Outputs: *y: A Tensor of type float32. + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator FakeQuantWithMinMaxArgsGradient. */ REG_OP(FakeQuantWithMinMaxArgsGradient) .INPUT(gradients, TensorType({DT_FLOAT})) @@ -1515,6 +1768,9 @@ REG_OP(FakeQuantWithMinMaxArgsGradient) *@par Outputs: *y: A Tensor of type float32. + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator FakeQuantWithMinMaxVars. */ REG_OP(FakeQuantWithMinMaxVars) .INPUT(x, TensorType({DT_FLOAT})) @@ -1551,6 +1807,8 @@ REG_OP(FakeQuantWithMinMaxVars) *@see Region() +*@par Third-party framework compatibility +* Compatible with the operator FakeQuantWithMinMaxVarsGradient. */ REG_OP(FakeQuantWithMinMaxVarsGradient) .INPUT(gradients, TensorType({DT_FLOAT})) @@ -1565,7 +1823,7 @@ REG_OP(FakeQuantWithMinMaxVarsGradient) .OP_END_FACTORY_REG(FakeQuantWithMinMaxVarsGradient) /** -*@brief Fake-quantizes the "inputs" tensor of type float \n +*@brief Fake-quantizes the "inputs" tensor of type float via per-channel floats min and max of shape [d] to "outputs" \n tensor of same shape as inputs @@ -1589,6 +1847,9 @@ tensor of same shape as inputs *@li "num_bits" is between 2 and 16 *@see Region() + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator FakeQuantWithMinMaxVarsPerChannel. */ REG_OP(FakeQuantWithMinMaxVarsPerChannel) .INPUT(x, TensorType({DT_FLOAT})) @@ -1625,6 +1886,9 @@ REG_OP(FakeQuantWithMinMaxVarsPerChannel) *@li "num_bits" is between 2 and 16 *@see Region() + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator FakeQuantWithMinMaxVarsPerChannelGradient. */ REG_OP(FakeQuantWithMinMaxVarsPerChannelGradient) .INPUT(gradients, TensorType({DT_FLOAT})) @@ -1644,11 +1908,14 @@ REG_OP(FakeQuantWithMinMaxVarsPerChannelGradient) *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: int8, int16, -* int32, int64, uint8, uint16, uint32, uint64. +* int32, int64, uint8, uint16, uint32, uint64. Broadcasting is supported. * @li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BitwiseAnd. */ REG_OP(BitwiseAnd) .INPUT(x1, TensorType::IntegerDataType()) @@ -1662,11 +1929,14 @@ REG_OP(BitwiseAnd) *@par Inputs: *Two inputs, including: * @li x1: A Tensor. Must be one of the following types: int8, int16, -* int32, int64, uint8, uint16, uint32, uint64. +* int32, int64, uint8, uint16, uint32, uint64. Broadcasting is supported. * @li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BitwiseOr. */ REG_OP(BitwiseOr) .INPUT(x1, TensorType::IntegerDataType()) @@ -1678,13 +1948,16 @@ REG_OP(BitwiseOr) *@brief Elementwise computes the bitwise XOR of "x1" and "x2". *@par Inputs: -*Two inputs, including: \n -*@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64.\n -* The format is NC1HWC0 or ND. +*Two inputs, including: +*@li x1: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64. +* The format is NC1HWC0 or ND. Broadcasting is supported. *@li x2: A Tensor. Has the same type and format as "x1". *@par Outputs: *y: Output result. Has the same type as "x1". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator BitwiseXor. */ REG_OP(BitwiseXor) .INPUT(x1, TensorType::IntegerDataType()) @@ -1696,10 +1969,12 @@ REG_OP(BitwiseXor) *@brief Returns element-wise smallest integer not less than "x". *@par Inputs: -* x: A Tensor. TensorType::FloatingDataType(). +* x: A Tensor of type float16 or float32 or float64. *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Ceil. */ REG_OP(Ceil) .INPUT(x, TensorType::FloatingDataType()) @@ -1710,10 +1985,13 @@ REG_OP(Ceil) *@brief Returns element-wise largest integer not greater than "x". *@par Inputs: -*x: A Tensor of type float16 or float32. +*x: A Tensor of type float16, float32 or double. *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with TensorFlow operator Floor. */ REG_OP(Floor) .INPUT(x, TensorType::FloatingDataType()) @@ -1732,6 +2010,9 @@ REG_OP(Floor) *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator FloorDiv. */ REG_OP(FloorDiv) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8, @@ -1750,11 +2031,14 @@ REG_OP(FloorDiv) *@par Inputs: * Two inputs, including: -*@li x1: A Tensor +*@li x1: A Tensor. Must be one of the following types: +* int32, int64, float, float16, double *@li x2: A Tensor. Must have the same type as "x1". * *@par Outputs: *y: Result remainder. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator FloorMod. */ REG_OP(FloorMod) .INPUT(x1, TensorType({DT_INT32, DT_INT64, DT_FLOAT, DT_FLOAT16, @@ -1776,6 +2060,9 @@ REG_OP(FloorMod) *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Pow. */ REG_OP(Pow) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_INT8, @@ -1796,6 +2083,9 @@ REG_OP(Pow) *@par Outputs: *y: A mutable Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Rint. */ REG_OP(Rint) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -1803,13 +2093,17 @@ REG_OP(Rint) .OP_END_FACTORY_REG(Rint) /** -*@brief Rounds the values of a tensor to the nearest integer, element-wise. Rounds half to even. +*@brief Rounds the values of a tensor to the nearest integer, element-wise. + * Rounds half to even. *@par Inputs: -*Inputs including: \n -*x: A required Tensor of type float16, float32, or int32. +*Inputs including: +*x: A required ND Tensor of type float16, float, int64, double, complex64, + * complex128 or int32. *@par Outputs: -*y: A required Tensor. Has the same data type and shape as "x". +*y: A required ND Tensor. Has the same data type and shape as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Round. */ REG_OP(Round) .INPUT(x, TensorType(DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, @@ -1823,10 +2117,14 @@ REG_OP(Round) *@par Inputs: *One input: -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128, int32, int64 +*x: An ND Tensor. Must be one of the following types: float16, float32, double, + * complex64, complex128, int32, int64 *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: An ND Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Sin. */ REG_OP(Sin) .INPUT(x, TensorType::UnaryDataType()) @@ -1842,6 +2140,9 @@ REG_OP(Sin) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Tan. */ REG_OP(Tan) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_COMPLEX64, @@ -1855,11 +2156,15 @@ REG_OP(Tan) *@par Inputs: *Two inputs, including: -* @li x1: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. +* @li x1: A Tensor. Must be one of the following types: float16, float32, +* double, int32, int64. * @li x2: A Tensor of the same type as "x1". *@par Outputs: *y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator TruncateMod. */ REG_OP(TruncateMod) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, @@ -1875,14 +2180,18 @@ REG_OP(TruncateMod) *@par Inputs: *Two inputs, including: -* @li x: A Tensor of type NumberType. -* @li bias: A 1D Tensor of the same type as "x". +* @li x: A Tensor of type NumberType. Must be one of the following types: float32, float64, int32, uint8, int16, +*int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64. +* @li bias: A 1D Tensor with size the C dimension of value. *@par Attributes: *data_format: An optional string. Defaults to "NHWC". *@par Outputs: *y: A Tensor with same type as "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BiasAdd. */ REG_OP(BiasAdd) .INPUT(x, TensorType::NumberType()) @@ -1895,16 +2204,19 @@ REG_OP(BiasAdd) *@brief Returns the index with the smallest value across dimensions of a tensor. *@par Inputs: -*Two inputs, including: \n -*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64.\n +*Two inputs, including: +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, complex64, int64, qint8, quint8, qint32, bfloat16, uint16, complex128, float16, uint32, uint64. *format is ND. -*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across. \n +*@li dimension: A Tensor. Must be one of the following types: int32, int64. Must be in the range [-rank(input x), rank(input x)]. Describes which dimension of the input Tensor to reduce across. * The format is ND. *@par Attributes: *dtype: The output type, either "int32" or "int64". Defaults to "int64". *@par Outputs: *y: A Tensor of type "dtype". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ArgMin. */ REG_OP(ArgMin) .INPUT(x, TensorType::NumberType()) @@ -1917,7 +2229,7 @@ REG_OP(ArgMin) *@brief Returns the index with the smallest value across dimensions of a tensor. *@par Inputs: -*One input: \n +*One input: *x: A Tensor of type float16 or float32 in ND format. @@ -1927,6 +2239,9 @@ REG_OP(ArgMin) *@par Outputs: *y: A Tensor of type dtype. + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ArgMin. */ REG_OP(ArgMinD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1939,16 +2254,22 @@ REG_OP(ArgMinD) *@brief Returns the index with the largest value across axes of a tensor. *@par Inputs: -* Two inputs, including: \n +* Two inputs, including: *@li x: A multi-dimensional Tensor of type float16, float32, or int16. *@li dimension: A Scalar of type int32, specifying the index with the largest value. -*@par Outputs: \n +*@par Attributes: +*dtype: The output type, either "int32" or "int64". Defaults to "int64". + +*@par Outputs: *y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". *@attention Constraints: *@li x: If there are multiple maximum values, the index of the first maximum value is used. *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ArgMax. */ REG_OP(ArgMaxV2) .INPUT(x, TensorType::NumberType()) @@ -1961,18 +2282,22 @@ REG_OP(ArgMaxV2) *@brief Returns the index with the largest value across axes of a tensor. *@par Inputs: -* One input, including: \n +* One input, including: *x: A multi-dimensional Tensor of type float16, float32. *@par Attributes: -*dimension: An integer of type int32, specifying the axis information of the index with the maximum value. +*@li dimension: An integer of type int32, specifying the axis information of the index with the maximum value. +*@li dtype: The output type, either "int32" or "int64". Defaults to "int64". -*@par Outputs: \n +*@par Outputs: *y: A multi-dimensional Tensor of type int32, specifying the index with the largest value. The dimension is one less than that of "x". *@attention Constraints: *@li x: If there are multiple maximum values, the index of the first maximum value is used. *@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ArgMax. */ REG_OP(ArgMaxD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -1982,23 +2307,36 @@ REG_OP(ArgMaxD) .OP_END_FACTORY_REG(ArgMaxD) /** -*@brief Returns the maximum value of all elements in the input in the given dimension. +*@brief Returns the maximum value of all elements in the input in the given +* dimension. *@par Inputs: *One input: \n *x: A multi-dimensional Tensor of type float16 or float32. *@par Attributes: -*@li dimension: An integer of type int32, specifying the axis information of the index with the maximum value. -*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". +*@li dimension: An integer of type int32, specifying the axis information of +* the index with the maximum value. +*@li keep_dims: A bool, specifying whether to keep dimensions for the output +* Tensor. Defaults to "false". *@par Outputs: -*@li indice: A multi-dimensional Tensor of type int32, specifying the index. (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".) -*@li values: A Tensor, specifying the maximum value. Has the same dimensions as "indice" and the same type as "x". +*@li indice: A multi-dimensional Tensor of type int32, specifying the index. +* (If "keep_dims" is set to "false", the output dimensions are reduced by +* "dimension" compared with that of "x". Otherwise, the output has one fewer +* dimension than "x".) +*@li values: A Tensor, specifying the maximum value. Has the same dimensions +* as "indice" and the same type as "x". *@attention Constraints: -*@li If there are multiple maximum values, the index of the first maximum value is used. -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". +*@li If there are multiple maximum values, the index of the first maximum +* value is used. +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the +* dimension length of "x". + +*@par Third-party framework compatibility +* Compatible with the two output scenarios of PyTorch operator Max (the output +* sequence is opposite to that of PyTorch). */ REG_OP(ArgMaxWithValue) .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) @@ -2014,17 +2352,30 @@ REG_OP(ArgMaxWithValue) *x: A multi-dimensional Tensor of type float16 or float32. *@par Attributes: -*@li dimension: An integer of type int32, specifying the axis information of the index with the maximum value. -*@li keep_dims: A bool, specifying whether to keep dimensions for the output Tensor. Defaults to "false". +*@li dimension: An integer of type int32, specifying the axis information of +* the index with the maximum value. +*@li keep_dims: A bool, specifying whether to keep dimensions for the output +* Tensor. Defaults to "false". *@par Outputs: -*@li indice: A multi-dimensional Tensor of type int32, specifying the index. (If "keep_dims" is set to "false", the output dimensions are reduced by "dimension" compared with that of "x". Otherwise, the output has one fewer dimension than "x".) -*@li values: A Tensor, specifying the minimum value. Has the same dimensions as "indice" and the same type as "x". +*@li indice: A multi-dimensional Tensor of type int32, specifying the index. +* (If "keep_dims" is set to "false", the output dimensions are reduced by +* "dimension" compared with that of "x". Otherwise, the output has one fewer +* dimension than "x".) +*@li values: A Tensor, specifying the minimum value. Has the same dimensions +* as "indice" and the same type as "x". *@attention Constraints: -*@li If there are multiple minimum values, the index of the first minimum value is used. -*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the dimension length of "x". -*@li Performing the ArgMinWithValue operation on the last axis of float32 data is not supported on a mini platform. +*@li If there are multiple minimum values, the index of the first minimum +* value is used. +*@li The value range of "dimension" is [-dims, dims - 1]. "dims" is the +* dimension length of "x". +*@li Performing the ArgMinWithValue operation on the last axis of float32 data +* is not supported on a mini platform. + +*@par Third-party framework compatibility +* Compatible with the two output scenarios of PyTorch operator Min (the output +* sequence is opposite to that of PyTorch). */ REG_OP(ArgMinWithValue) .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16})) @@ -2052,7 +2403,7 @@ REG_OP(ArgMinWithValue) * "0": product, "1": sum, "2": max. *@li coeff: A required attribute. Must met all of following rules: * size of "coeff" must be equal to len("x") or is null. -* the absolute value of “coeff” must less than or equal to 1. +* the absolute value of "coeff" must less than or equal to 1. */ REG_OP(Eltwise) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -2070,6 +2421,9 @@ REG_OP(Eltwise) *@par Outputs: *y: A Tensor of type uint8. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator PopulationCount. */ REG_OP(PopulationCount) .INPUT(x, TensorType::IntegerDataType()) @@ -2434,6 +2788,9 @@ REG_OP(SquareSumAll) *@par Outputs: *@li y: A Tensor. Has the same type as "x1". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator LRN. */ REG_OP(FusedMulAddN) .INPUT(x1, TensorType::NumberType()) @@ -2452,8 +2809,8 @@ REG_OP(FusedMulAddN) *@par Attributes: *@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". -*@li num_axes: -*@li bias_from_blob: +*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1". +*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". *@par Outputs: *y: An ND tensor of type float16 or float32. @@ -2468,6 +2825,8 @@ REG_OP(FusedMulAddN) * If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). *@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n * If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). +*@par Third-party framework compatibility +* Compatible with the Caffe operator Bias. */ REG_OP(Bias) @@ -2509,6 +2868,8 @@ REG_OP(FusedMulAddNL2loss) *@par Outputs: *@li y: A Tensor with any format. Has the same type as the input. Must be one of the following types: float16, float32. +*@par Third-party framework compatibility +* Compatible with the Caffe operator Threshold. */ REG_OP(Threshold) @@ -2517,6 +2878,25 @@ REG_OP(FusedMulAddNL2loss) .ATTR(threshold, Float, 0.0) .OP_END_FACTORY_REG(Threshold); +/** +*@brief Returns the index number corresponding to the maximum value entered. + +*@par Inputs: +*@li x: A tensor. Must be one of the following types: float16, float32. + +*@par Attributes: +*@li axis: An optional int. Specify the axis to be cut at the input tensor. If this parameter is not provided, find the topk for each batch. Defaults to 10000 +*@li out_max_val: An optional bool. Whether to output the maximum value. If it is True, the maximum value and index are output, otherwise only the index is output. +* Defaults to False +*@li topk: An optional int. It means the number of top tok in each axis (the value is greater than or equal to 1), and the value range must be in [1,x.shape(axis)]. +* Defaults to 1 + +*@par Outputs: +*@li indices: A tensor of type float16, float32, int32. The index of the maximum value of the output. +*@li values: A tensor of type float16, float32.Output tensor, including maximum index or maximum value. +*@par Third-party framework compatibility +* Compatible with the Caffe operator ArgMax. +*/ REG_OP(ArgMaxWithK) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(indices, TensorType({DT_INT32, DT_FLOAT, DT_FLOAT16})) @@ -2537,6 +2917,8 @@ REG_OP(ArgMaxWithK) *@par Outputs: *@li y: A Tensor. Has the same type and shape as "x1". +*@par Third-party framework compatibility: +* Compatible with the Pytorch operator muls. */ REG_OP(Muls) .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16})) @@ -2555,6 +2937,8 @@ REG_OP(Muls) *@par Outputs: *@li y: A Tensor. Has the same type and shape as "x1". +*@par Third-party framework compatibility: +* Compatible with the Pytorch operator fills. */ REG_OP(Fills) .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16})) @@ -2573,6 +2957,8 @@ REG_OP(Fills) *@par Outputs: *@li y: A Tensor. Has the same type and shape as "x1". +*@par Third-party framework compatibility: +* Compatible with the Pytorch operator adds. */ REG_OP(Adds) .INPUT(x, TensorType({DT_FLOAT,DT_INT16,DT_INT32,DT_FLOAT16})) @@ -2580,6 +2966,19 @@ REG_OP(Fills) .REQUIRED_ATTR(value,Float) .OP_END_FACTORY_REG(Adds) +/** +*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. + +*@par Inputs: +* @li x1: A Tensor. Must be one of the following types:float16, float32, double, complex64, complex128. +* @li x2: A Tensor. Has the same type and shape as "x1". + +*@par Outputs: +*y: A Tensor. Has the same type and shape as "x1". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator MulNoNan. +*/ REG_OP(MulNoNan) .INPUT(x1, TensorType::NumberType()) /* "First operand." */ .INPUT(x2, TensorType::NumberType()) /* "Second operand." */ @@ -2607,6 +3006,8 @@ REG_OP(Axpy) *@par Outputs: *@li y: A ND Tensor with Must be float32. +*@par Third-party framework compatibility +* Compatible with the PyTorch operator CosineEmbeddingLoss. */ REG_OP(CosineEmbeddingLoss) .INPUT(x1, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -2617,6 +3018,29 @@ REG_OP(CosineEmbeddingLoss) .OUTPUT(y, TensorType({DT_FLOAT})) .OP_END_FACTORY_REG(CosineEmbeddingLoss) +/** +*@brief Kullback-Leibler divergence. + +*@par Inputs: +*@li x: Tensor of arbitrary shape. +*@li target: Tensor of the same shape and dtype as x. + +*@par Attributes: +*reduction: An required "string", Specifies the reduction to apply to the output; +* Reduction only supports the two modes of "sum" and "batchmean". + +*@par Outputs: +*y: A ND Tensor of the same dtype as x. +*@par Third-party framework compatibility +*Compatible with the PyTorch operator kl_div. +*/ +REG_OP(KLDiv) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(reduction, String) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OP_END_FACTORY_REG(KLDiv) + } // namespace ge diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index aaad03c6..9b3694f1 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -37,6 +37,8 @@ interpretted as channels, and must be three. Inputs include: \n *Input images is a tensor of at least 3 dimensions. The last dimension is \n interpretted as channels, and must be three. +*@par Third-party framework compatibility +*Compatible with tensorflow AdjustHue operator. */ REG_OP(AdjustHue) @@ -61,6 +63,8 @@ interpretted as channels, and must be three. Inputs include: \n *Input images is a tensor of at least 3 dimensions. The last dimension is \n interpretted as channels, and must be three. +*@par Third-party framework compatibility +*Compatible with tensorflow AdjustSaturation operator. */ REG_OP(AdjustSaturation) @@ -85,6 +89,8 @@ interpreted as '[height, width, channels]'. Inputs include: \n *Input images is a tensor of at least 3 dimensions. The last dimension is \n interpretted as channels, and must be three. +*@par Third-party framework compatibility +*Compatible with tensorflow AdjustContrast operator. */ REG_OP(AdjustContrast) @@ -122,6 +128,8 @@ NearestNeighbor. *@attention Constraints: \n *Input images must be a 4-D tensor. +*@par Third-party framework compatibility +*Compatible with tensorflow CropAndResize operator. */ REG_OP(CropAndResize) @@ -161,6 +169,8 @@ supported for now. *@attention Constraints: \n *Input images and grads must be a 4-D tensor. +*@par Third-party framework compatibility +*Compatible with tensorflow CropAndResizeGradBoxes operator. */ REG_OP(CropAndResizeGradBoxes) @@ -200,6 +210,8 @@ supported for now. *@attention Constraints: \n *Input grads must be a 4-D tensor. +*@par Third-party framework compatibility +*Compatible with tensorflow CropAndResizeGradImage operator. */ REG_OP(CropAndResizeGradImage) @@ -243,6 +255,8 @@ glimpse_width, channels]. *@attention Constraints: \n *Input x must be a 4-D tensor. +*@par Third-party framework compatibility +*Compatible with tensorflow CropAndResizeGradImage operator. */ REG_OP(ExtractGlimpse) @@ -269,6 +283,8 @@ REG_OP(ExtractGlimpse) *@attention Constraints: \n *Last dimension of input x must be size 3. +*@par Third-party framework compatibility +*Compatible with tensorflow HSVToRGB operator. */ REG_OP(HSVToRGB) @@ -301,6 +317,8 @@ the values at the corner pixels. Defaults to false. *@attention Constraints: \n *Input images and output images must be quantized types. +*@par Third-party framework compatibility +*Compatible with tensorflow QuantizedResizeBilinear operator. */ REG_OP(QuantizedResizeBilinear) @@ -335,6 +353,8 @@ Defaults to false. *@attention Constraints: \n *Input images can be of different types but output images are always float. +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeArea operator. */ REG_OP(ResizeArea) @@ -368,6 +388,8 @@ false. *@attention Constraints: \n *Input images can be of different types but output images are always float. +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeBicubicGrad operator. */ REG_OP(ResizeBicubicGrad) @@ -399,6 +421,8 @@ Defaults to false. *@attention Constraints: \n *Input images can be of different types but output images are always float. +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeBicubic operator. */ REG_OP(ResizeBicubic) @@ -431,6 +455,9 @@ false. *@attention Constraints: \n *Input grads must be a 4-D tensor. + +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeNearestNeighborV2Grad operator. */ REG_OP(ResizeNearestNeighborV2Grad) @@ -460,6 +487,8 @@ false. *@par Outputs: *y: A Tensor. Has the same type as grads. +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeNearestNeighborV2GradD operator. */ REG_OP(ResizeNearestNeighborV2GradD) @@ -490,6 +519,9 @@ false. *@attention Constraints: \n *Input grads must be a 4-D tensor. + +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeBilinearV2Grad operator. */ REG_OP(ResizeBilinearV2Grad) @@ -519,6 +551,9 @@ Defaults to false. *@attention Constraints: \n *Input images can be of different types but output images are always float. + +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeBilinearV2 operator. */ REG_OP(ResizeBilinearV2) @@ -546,6 +581,8 @@ higher rank. RGB data to convert. Last dimension must be size 3. value of the pixels. The output is only well defined if the value in images \n are in [0,1]. +*@par Third-party framework compatibility +*Compatible with tensorflow RGBToHSV operator. */ REG_OP(RGBToHSV) @@ -587,6 +624,8 @@ If false, raise an error. *@attention Constraints: \n *Input images can be of different types but output images are always float. +*@par Third-party framework compatibility +*Compatible with tensorflow SampleDistortedBoundingBoxExt2 operator. */ REG_OP(SampleDistortedBoundingBoxExt2) @@ -623,6 +662,9 @@ Defaults to false. *@par Outputs: *y: 4-D with shape [batch, new_height, new_width, channels]. + +*@par Third-party framework compatibility +*Compatible with tensorflow ResizeNearestNeighborV2 operator. */ REG_OP(ResizeNearestNeighborV2) @@ -651,6 +693,8 @@ num_bounding_boxes, 4] containing bounding boxes. *@attention Constraints: \n *Input images must be a 4-D tensor. +*@par Third-party framework compatibility +*Compatible with tensorflow DrawBoundingBoxes operator. */ REG_OP(DrawBoundingBoxes) @@ -682,6 +726,8 @@ indices from the boxes tensor, where M <= max_output_size. *@attention Constraints: \n *Input boxes and scores must be float type. +*@par Third-party framework compatibility +*Compatible with tensorflow NonMaxSuppression operator. */ REG_OP(NonMaxSuppression) @@ -713,6 +759,8 @@ indices from the boxes tensor, where M <= max_output_size. *@attention Constraints: \n *Input boxes and scores must be float type. +*@par Third-party framework compatibility +*Compatible with tensorflow NonMaxSuppressionV2 operator. */ REG_OP(NonMaxSuppressionV2) @@ -746,6 +794,8 @@ indices from the boxes tensor, where M <= max_output_size. *@attention Constraints: \n *Input boxes and scores must be float type. +*@par Third-party framework compatibility +*Compatible with tensorflow NonMaxSuppressionV3 operator. */ REG_OP(NonMaxSuppressionV3) @@ -786,6 +836,8 @@ elements in selected_indices, with the valid elements appearing first. *@attention Constraints: \n *Input boxes and scores must be float type. +*@par Third-party framework compatibility +*Compatible with tensorflow NonMaxSuppressionV4 operator. */ REG_OP(NonMaxSuppressionV4) @@ -824,6 +876,8 @@ to be of length max_output_size. Defaults to false. *selected_indices: A 1-D integer tensor of shape [M] representing the \n selected indices from the boxes tensor, where M <= max_output_size. +*@par Third-party framework compatibility +*Compatible with tensorflow NonMaxSuppressionWithOverlaps operator. */ REG_OP(NonMaxSuppressionWithOverlaps) @@ -860,6 +914,8 @@ inch ('in') or centimeter ('cm'). *@par Outputs: *contents: 0-D. JPEG-encoded image. +*@par Third-party framework compatibility +*Compatible with tensorflow EncodeJpeg operator. */ REG_OP(EncodeJpeg) @@ -891,6 +947,8 @@ where channels is: 1: for grayscale; 2: for grayscale + alpha; 3: for RGB; \n *@par Outputs: *contents: 0-D. PNG-encoded image. +*@par Third-party framework compatibility +*Compatible with tensorflow EncodePng operator. */ REG_OP(EncodePng) @@ -921,6 +979,9 @@ pixels of the input and output tensors are aligned. Defaults to "false". size[1] <= 2048. *@li The input "images" must be a tensor of 5 elements: images[2] <= 2048, \n images[3] <= 2048. + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ResizeBilinearV2D. */ REG_OP(ResizeBilinearV2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -950,6 +1011,9 @@ pixels of the input and output tensors are aligned. Defaults to "false". *@attention Constraints: * The input "size" must be a tensor of 2 elements: size[0] <= 7680, \n size[1] <= 4320 + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ResizeNearestNeighborV2. */ REG_OP(ResizeNearestNeighborV2D) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -972,6 +1036,9 @@ to int32. *@par Outputs: *image_shape: 1-D. The image shape with format [height, width, channels]. + +*@par Third-party framework compatibility +*Compatible with tensorflow ExtractJpegShape operator. */ REG_OP(ExtractJpegShape) @@ -993,6 +1060,9 @@ containing bounding boxes. *@par Outputs: *y: Returns 4-D with the same shape as `images`. \n The batch of input images with bounding boxes drawn on the images. + +*@par Third-party framework compatibility +* Compatible with tensorflow DrawBoundingBoxesV2 operator. */ REG_OP(DrawBoundingBoxesV2) @@ -1030,6 +1100,9 @@ selected indices from the boxes tensor, where M <= max_output_size. scores for each selected box, where `M <= max_output_size`. *@li valid_outputs: A 0-D integer tensor representing the number of valid \n elements in selected_indices, with the valid elements appearing first. + +*@par Third-party framework compatibility +* Compatible with tensorflow NonMaxSuppressionV5 operator. */ REG_OP(NonMaxSuppressionV5) @@ -1058,6 +1131,9 @@ REG_OP(NonMaxSuppressionV5) *@par Outputs: *y: A Tensor with type float32. + +*@par Third-party framework compatibility +* Compatible with TensorFlow ScaleAndTranslate operator. */ REG_OP(ScaleAndTranslate) @@ -1082,6 +1158,9 @@ REG_OP(ScaleAndTranslate) *@par Outputs: *y: A `Tensor`. Has the same type as `grads`. + +*@par Third-party framework compatibility +* Compatible with TensorFlow ScaleAndTranslateGrad operator. */ REG_OP(ScaleAndTranslateGrad) @@ -1126,6 +1205,8 @@ coordinates as it is. If not specified, defaults to true. *y: A 1-D integer tensor of shape `[M]` representing the selected \n indices from the boxes tensor, where `M <= max_output_size`. +*@par Third-party framework compatibility +* Compatible with tensorflow CombinedNonMaxSuppression operator. */ REG_OP(CombinedNonMaxSuppression) diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h index 985af4eb..916c3267 100644 --- a/third_party/fwkacllib/inc/ops/linalg_ops.h +++ b/third_party/fwkacllib/inc/ops/linalg_ops.h @@ -42,6 +42,8 @@ triangular part of the innermost matrices of this tensor. *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow CholeskyGrad operator. */ REG_OP(CholeskyGrad) @@ -65,6 +67,8 @@ is [..., M, M]. *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow Cholesky operator. */ REG_OP(Cholesky) @@ -90,6 +94,8 @@ form square matrices. Inputs include: \n *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow LogMatrixDeterminant operator. */ REG_OP(LogMatrixDeterminant) @@ -114,6 +120,8 @@ form square matrices. Inputs include: \n *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow MatrixDeterminant operator. */ REG_OP(MatrixDeterminant) @@ -142,6 +150,8 @@ deal with matrix or its (block-wise) adjoint. *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow MatrixInverse operator. */ REG_OP(MatrixInverse) @@ -170,6 +180,8 @@ solve with matrix or its (block-wise) adjoint. *The input matrix is a tensor of shape [..., M, M] whose inner-most 2 \n dimensions form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow MatrixSolve operator. */ REG_OP(MatrixSolve) @@ -200,6 +212,8 @@ rhs[..., :, :] in the least squares sense. *The input matrix matrix is a tensor of shape [..., M, M] whose inner-most 2 \n dimensions form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow MatrixSolveLs operator. */ REG_OP(MatrixSolveLs) @@ -233,6 +247,8 @@ with matrix or its (block-wise) adjoint. *The input matrix is a tensor of shape [..., M, M] whose inner-most 2 \n dimensions form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow MatrixTriangularSolve operator. */ REG_OP(MatrixTriangularSolve) @@ -264,6 +280,8 @@ columns of q. *The input matrix x is a tensor of shape [..., M, N] whose inner-most 2 \n dimensions form matrices of size [M, N]. \n +*@par Third-party framework compatibility +*Compatible with tensorflow Qr operator. */ REG_OP(Qr) @@ -293,6 +311,8 @@ contain eigenvectors of the corresponding matrices in tensor *The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow SelfAdjointEig operator. */ REG_OP(SelfAdjointEig) @@ -329,6 +349,8 @@ returned if compute_uv is False. *The input x is a tensor of shape [..., N, N] whose inner-most 2 dimensions \n form square matrices. \n +*@par Third-party framework compatibility +*Compatible with tensorflow Svd operator */ REG_OP(Svd) @@ -353,6 +375,8 @@ denotes the lower triangular factor `L` with unit diagonal. *@li p: upper triangular part denotes the upper triangular factor `U`.Permutation \n of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]`. +*@par Third-party framework compatibility +* Compatible with TensorFlow Lu operator. */ REG_OP(Lu) @@ -371,6 +395,8 @@ REG_OP(Lu) *@par Outputs: y: Shape is `[..., M, M]`. +*@par Third-party framework compatibility +* Compatible with TensorFlow MatrixSquareRoot operator. */ REG_OP(MatrixSquareRoot) @@ -392,6 +418,8 @@ left-hand side. *@par Outputs: y: Tensor of shape `[..., M, K]` containing the solutions +*@par Third-party framework compatibility +* Compatible with TensorFlow TridiagonalSolve operator. */ REG_OP(TridiagonalSolve) diff --git a/third_party/fwkacllib/inc/ops/logging_ops.h b/third_party/fwkacllib/inc/ops/logging_ops.h index 2564282d..897fc699 100644 --- a/third_party/fwkacllib/inc/ops/logging_ops.h +++ b/third_party/fwkacllib/inc/ops/logging_ops.h @@ -33,6 +33,8 @@ the Unix epoch. *The timestamp is computed when the op is executed, not when it is added to \n the graph. +*@par Third-party framework compatibility +*Compatible with tensorflow Timestamp operator. */ REG_OP(Timestamp) @@ -51,6 +53,8 @@ Inputs include: \n *@par Attributes: *summarize: Print this many entries of each tensor. +*@par Third-party framework compatibility +*Compatible with tensorflow Assert operator. */ REG_OP(Assert) @@ -67,6 +71,7 @@ REG_OP(Assert) *@par Inputs: *x: The tensor to print, it is a dynamic_input. +*Compatible with aicpu Print operator. */ REG_OP(Print) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, DT_INT32, @@ -84,6 +89,8 @@ REG_OP(Print) *output_stream: A string specifying the output stream or logging level \n to print to. +*@par Third-party framework compatibility +*Compatible with tensorflow PrintV2 operator. */ REG_OP(PrintV2) .INPUT(x, TensorType({DT_STRING})) diff --git a/third_party/fwkacllib/inc/ops/lookup_ops.h b/third_party/fwkacllib/inc/ops/lookup_ops.h index 390e50c6..4dd87a8e 100644 --- a/third_party/fwkacllib/inc/ops/lookup_ops.h +++ b/third_party/fwkacllib/inc/ops/lookup_ops.h @@ -30,6 +30,8 @@ namespace ge { *@li keys: A Tensor. Any shape. Keys to look up. *@li values: A Tensor. Values to associate with keys. +*@par Third-party framework compatibility. +*Compatible with tensorflow LookupTableImport operator. */ REG_OP(LookupTableImport) @@ -52,6 +54,8 @@ REG_OP(LookupTableImport) *@li The tensor keys must be of the same type as the keys of the table. \n *@li The tensor values must be of the type of the table values. \n +*@par Third-party framework compatibility. +*Compatible with tensorflow LookupTableInsert operator. */ REG_OP(LookupTableInsert) @@ -76,6 +80,8 @@ REG_OP(LookupTableInsert) *@li keys: A Tensor of type Tkeys. *@li values: A Tensor of type Tvalues. +*@par Third-party framework compatibility. +*Compatible with tensorflow LookupTableExport operator. */ REG_OP(LookupTableExport) @@ -97,6 +103,8 @@ REG_OP(LookupTableExport) *@par Outputs: *size: A Tensor of type int64. +*@par Third-party framework compatibility. +*Compatible with tensorflow LookupTableSize operator. */ REG_OP(LookupTableSize) @@ -119,6 +127,8 @@ REG_OP(LookupTableSize) *@par Outputs: *values: A Tensor. Has the same type as default_value. +*@par Third-party framework compatibility. +*Compatible with tensorflow LookupTableFind operator. */ REG_OP(LookupTableFind) @@ -150,6 +160,8 @@ shared_name is empty, the table is shared using the node name. *@attention Constraints: \n *The implementation for HashTable on Ascend uses ai cpu, with bad performance. \n +*@par Third-party framework compatibility. +*Compatible with tensorflow HashTable operator. */ REG_OP(HashTable) @@ -172,6 +184,8 @@ initialized. *@li keys: A Tensor. Keys of type Tkey. *@li values: A Tensor. Values of type Tval. +*@par Third-party framework compatibility. +*Compatible with tensorflow InitializeTable operator. */ REG_OP(InitializeTable) @@ -209,6 +223,8 @@ Must be between 0 and 1. *@par Outputs: *handle: A Tensor of type resource. Handle to the table. +*@par Third-party framework compatibility. +*Compatible with tensorflow MutableDenseHashTable operator. */ REG_OP(MutableDenseHashTable) @@ -241,6 +257,8 @@ shared_name is empty, the table is shared using the node name. *@par Outputs: *handle: A Tensor of type resource. Handle to the table. +*@par Third-party framework compatibility. +*Compatible with tensorflow MutableHashTableOfTensors operator. */ REG_OP(MutableHashTableOfTensors) @@ -269,6 +287,8 @@ shared_name is empty, the table is shared using the node name. *@par Outputs: *handle: A Tensor of type resource. Handle to the table. +*@par Third-party framework compatibility. +*Compatible with tensorflow MutableHashTable operator. */ REG_OP(MutableHashTable) diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 0bee7097..5d34804c 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -35,6 +35,8 @@ namespace ge { *@par Outputs: * y: A Tensor. Has the same type and shape as "x". +*@par Third-party framework compatibility +* Compatible with the Caffe operator Power. */ REG_OP(Power) @@ -56,6 +58,8 @@ REG_OP(Power) *@par Outputs: *z:A Tensor. Has the same type as a. +*@par Third-party framework compatibility. +*Compatible with tensorflow Igamma operator. */ REG_OP(Igamma) @@ -75,6 +79,8 @@ REG_OP(Igamma) *@par Outputs: *z:A Tensor. Has the same type as a. +*@par Third-party framework compatibility. +*Compatible with tensorflow Igammac operator. */ REG_OP(Igammac) @@ -98,6 +104,8 @@ a uint8. *@attention Constraints: \n *Currently, the innermost dimension of the tensor must be divisible by 8. \n +*@par Third-party framework compatibility +*Compatible with tensorflow CompareAndBitpack operator */ REG_OP(CompareAndBitpack) @@ -127,6 +135,8 @@ equal to 1. *bins:1D Tensor with length equal to size. The counts or summed weights for \n each value in the range [0, size). +*@par Third-party framework compatibility +*Compatible with tensorflow Bincount operator */ REG_OP(Bincount) @@ -148,6 +158,8 @@ REG_OP(Bincount) *@par Outputs: *z:A Tensor. Has the same type as a. +*@par Third-party framework compatibility. +*Compatible with tensorflow Betainc operator. */ REG_OP(Betainc) @@ -171,6 +183,8 @@ REG_OP(Betainc) *@attention Constraints: \n *The implementation for Zeta on Ascend uses ai cpu, with bad performance. \n +*@par Third-party framework compatibility. +*Compatible with tensorflow Zeta operator. */ REG_OP(Zeta) @@ -194,6 +208,8 @@ the output will be output = [[0, 3] [3, 2] [1, 3]] *@par Outputs: *y:Same shape with 'input', each value of input replaced with bucket index. +*@par Third-party framework compatibility. +*Compatible with tensorflow Bucketize operator. */ REG_OP(Bucketize) @@ -217,6 +233,8 @@ sorted and can be repeated. *@par Outputs: *y:A Tensor. Has the same type as x. +*@par Third-party framework compatibility +*Compatible with tensorflow SparseSegmentSum operator */ REG_OP(SparseSegmentSum) @@ -242,6 +260,8 @@ sorted and can be repeated. *@par Outputs: *y:A Tensor. Has the same type as x. +*@par Third-party framework compatibility +*Compatible with tensorflow SparseSegmentMean operator */ REG_OP(SparseSegmentMean) @@ -268,6 +288,8 @@ SparseSegmentMean op. *@par Outputs: *y:A Tensor. Has the same type as grad. +*@par Third-party framework compatibility +*Compatible with tensorflow SparseSegmentMeanGrad operator */ REG_OP(SparseSegmentMeanGrad) @@ -289,6 +311,8 @@ REG_OP(SparseSegmentMeanGrad) *@par Outputs: *y:A Tensor. Has the same type as a. +*@par Third-party framework compatibility +*Compatible with tensorflow IgammaGradA operator */ REG_OP(IgammaGradA) @@ -303,6 +327,8 @@ REG_OP(IgammaGradA) *@par Attributes: *channel_name: A string. Default "". +*@par Third-party framework compatibility +*Compatible with tensorflow InitData operator */ REG_OP(InitData) @@ -322,6 +348,8 @@ to each component of an element of this dataset. *@par Outputs: *y:A nested structure of Tensor objects. +*@par Third-party framework compatibility +*Compatible with tensorflow GetNext operator */ REG_OP(GetNext) @@ -351,12 +379,15 @@ REG_OP(EndOfSequence) /** *@brief: Computes the Gauss error function of `x` element-wise. -*@par Inputs:\n -*x: A Tensor of type float16 or float32. +*@par Inputs: +*x: A Tensor of type float16, float32 or double. the format can be +* [NCHW,NC1HWC0,NHWC,ND] *@par Outputs: -*y: A Tensor. Has the same type as "x". +*y: A Tensor. Has the same type and format as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Erf. */ REG_OP(Erf) .INPUT(x, TensorType::FloatingDataType()) @@ -366,12 +397,14 @@ REG_OP(Erf) /** *@brief: Computes the Gauss complementary error function of "x" element-wise. -*@par Inputs:\n -*x: A Tensor of type float16 or float32. +*@par Inputs: +*x: A Tensor of type float16 ,float32, double. *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Erfc. */ REG_OP(Erfc) .INPUT(x, TensorType::FloatingDataType()) @@ -379,40 +412,42 @@ REG_OP(Erfc) .OP_END_FACTORY_REG(Erfc) /** -*@brief This operation returns a rank 1 histogram counting the number of entries in `values` \n -* that fell into every bin.The bins are equal width and determined by the arguments \n -* 'value_range' and 'nbins'. \n +*@brief This operation returns a rank 1 histogram counting the number of entries in `values` +* that fell into every bin.The bins are equal width and determined by the arguments +* 'value_range' and 'nbins'. *@par Inputs: *Three inputs, including: \n -*@li x: A Tensor of type float32,float16,int32. -*@li range: A Tensor of type float32,float16,int32. +*@li x: A Tensor of type float32, float16, int32, int64. +*@li range: A Tensor of type float32,float16,int32, int64. *@li nbins: A Tensor of type int32. *@par Attributes: * dtype: An optional attribute. Defaults to "int32". *@par Outputs: -*y: A Tensor. A Tensor of type int32. +*y: A Tensor. A Tensor of type int32 or int64. +*@par Third-party framework compatibility +* Compatible with TensorFlow operator HistogramFixedWidth. */ REG_OP(HistogramFixedWidth) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .INPUT(nbins, TensorType({DT_INT32})) .OUTPUT(y, TensorType({DT_INT32})) .ATTR(dtype, String, "int32") .OP_END_FACTORY_REG(HistogramFixedWidth) /** -*@brief This operation returns a rank 1 histogram counting the number of entries in `values` \n -* that fell into every bin.The bins are equal width and determined by the arguments \n -* 'value_range' and 'nbins'. \n +*@brief This operation returns a rank 1 histogram counting the number of entries in `values` +* that fell into every bin.The bins are equal width and determined by the arguments +* 'value_range' and 'nbins'. *@par Inputs: *Two inputs, including: \n -*@li x: A Tensor of type float32,float16,int32. -*@li range: A Tensor of type float32,float16,int32. +*@li x: A Tensor of type float32,float16,int32, int64. +*@li range: A Tensor of type float32,float16,int32, int64. *@par Attributes: *@li dtype: An optional attribute. Defaults to "int32". @@ -421,10 +456,12 @@ REG_OP(HistogramFixedWidth) *@par Outputs: *y: A Tensor. A Tensor of type int32. +*@par Third-party framework compatibility +* Compatible with TensorFlow operator HistogramFixedWidth. */ REG_OP(HistogramFixedWidthD) - .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) - .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) .OUTPUT(y, TensorType({DT_INT32})) .REQUIRED_ATTR(nbins, Int) .ATTR(dtype, String, "int32") @@ -441,6 +478,8 @@ REG_OP(HistogramFixedWidthD) *@par Outputs: *output:A Tensor. Has the same type as x1. +*@par Third-party framework compatibility +*Compatible with tensorflow NextAfter operator */ REG_OP(NextAfter) .INPUT(x1, TensorType({DT_FLOAT, DT_DOUBLE})) @@ -457,6 +496,8 @@ REG_OP(NextAfter) * *@par Outputs: * *y:A Tensor. Has the same shape as x. * + * *@par Third-party framework compatibility. + * *Compatible with tensorflow IsFinite operator. * */ REG_OP(IsFinite) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -472,6 +513,8 @@ REG_OP(IsFinite) * *@par Outputs: * *y:A tensor of type `float` or `double` that is the absolute value of each element in `x`. * + * *@par Third-party framework compatibility. + * *Compatible with tensorflow ComplexAbs operator. * */ REG_OP(ComplexAbs) .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128})) @@ -488,6 +531,8 @@ REG_OP(ComplexAbs) * *@par Outputs: * *y:A Tensor. Has the same shape as x. * + * *@par Third-party framework compatibility. + * *Compatible with tensorflow IsNan operator. * */ REG_OP(IsNan) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -503,6 +548,8 @@ REG_OP(IsNan) * *@par Outputs: * *output:A Tensor. Has the same shape as input. * + * *@par Third-party framework compatibility. + * *Compatible with tensorflow Real operator. * */ REG_OP(Real) .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) @@ -519,6 +566,8 @@ REG_OP(Real) * *@par Outputs: * *output:A Tensor. Has the same shape as input. * + * *@par Third-party framework compatibility. + * *Compatible with tensorflow output operator. * */ REG_OP(Conj) .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128})) @@ -542,6 +591,8 @@ REG_OP(Conj) * *@li y: A Tensor. Must be the following type: float32. * *@li total_weight: A Tensor. Must be the type: float32. * + * *@par Third-party framework compatibility + * *Compatible with pytorch NLLLoss operator * */ REG_OP(NLLLoss) .INPUT(x, TensorType({DT_FLOAT})) @@ -570,6 +621,8 @@ REG_OP(NLLLoss) * *One outputs, including: * *@li x_grad: A Tensor. Must be the following type: float32. * + * *@par Third-party framework compatibility + * *Compatible with pytorch NLLLossGrad operator * */ REG_OP(NLLLossGrad) .INPUT(x, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index df4c8359..625b0f85 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -25,12 +25,12 @@ namespace ge { *@brief Multiplies matrix "a" by matrix "b", producing "a * b". *@par Inputs: -*Two inputs, including: +*Three inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. -* @li bias: A 1D Tensor. Must be one of the following types: float16, +* @li bias: A optional 1D Tensor. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC]. *@par Attributes: @@ -40,6 +40,9 @@ namespace ge { *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMul) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -69,6 +72,9 @@ REG_OP(MatMul) *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, * float32, int32. Has format [ND, NHWC, FRACTAL_NZ]. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(MatMulV2) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) @@ -130,6 +136,9 @@ REG_OP(Gemm) *@par Outputs: *y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, * float32, int32. 2D or higher. Has format [ND, NHWC, FRACTAL_NZ]. Has the same shape length as "x1" and "x2". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchMatmul. */ REG_OP(BatchMatMul) @@ -148,7 +157,7 @@ REG_OP(MeanCCE) .ATTR(value1, ListInt, {}) .ATTR(mode, Int, 3) // 0:max pooling or 1:avg pooling .ATTR(pad_mode, Int, 0) - .ATTR(global_pooling, Bool, true) + .ATTR(global_pooling, Bool, true) // tensorflow have no attr, set default value .ATTR(window, ListInt, {1,1}) // kernel size .ATTR(pad, ListInt, {0,0,0,0}) // pad size .ATTR(stride, ListInt, {1,1}) // stride size @@ -194,6 +203,8 @@ REG_OP(MatMulCCE) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator L2Loss. */ REG_OP(L2Loss) .INPUT(x, TensorType::FloatingDataType()) @@ -204,11 +215,15 @@ REG_OP(L2Loss) *@brief: Returns a batched diagonal tensor with a given batched diagonal values. *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +*x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MatrixDiag. */ REG_OP(MatrixDiag) .INPUT(x, TensorType::BasicType()) @@ -226,6 +241,8 @@ REG_OP(MatrixDiag) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MatrixDiag. */ REG_OP(MatrixDiagD) .INPUT(x, TensorType::BasicType()) @@ -237,11 +254,15 @@ REG_OP(MatrixDiagD) *@brief: Returns the batched diagonal part of a batched tensor. *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +*x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MatrixDiagPart. */ REG_OP(MatrixDiagPart) .INPUT(x, TensorType::BasicType()) @@ -259,6 +280,8 @@ REG_OP(MatrixDiagPart) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MatrixDiagPart. */ REG_OP(MatrixDiagPartD) .INPUT(x, TensorType::BasicType()) @@ -271,12 +294,16 @@ REG_OP(MatrixDiagPartD) *@par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. *@li diagonal: A Tensor of the same type as "x". *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MatrixSetDiag. */ REG_OP(MatrixSetDiag) .INPUT(x, TensorType::BasicType()) @@ -296,6 +323,8 @@ REG_OP(MatrixSetDiag) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MatrixSetDiag. */ REG_OP(MatrixSetDiagD) .INPUT(x, TensorType::BasicType()) @@ -309,22 +338,26 @@ REG_OP(MatrixSetDiagD) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int8, uint8, bool -*@li indices: An ND Tensor. \n - -*Must be one of the following types: int32 -*@li updates: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int8, uint8, bool +*@li var: An ND Tensor. +*Must be one of the following types: float16, float32, int8, uint8, double, + * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, + * uint64 +*@li indices: An ND Tensor. +*Must be one of the following types: int32, int64 +*@li updates: An ND Tensor. +*Must be one of the following types: float16, float32, int8, uint8, double, + * int64, complex64, qint8, quint8, qint32, uint16, complex128, half, uint32, + * uint64 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", + * the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNdUpdate. */ REG_OP(ScatterNdUpdate) .INPUT(var, TensorType::BasicType()) @@ -352,6 +385,8 @@ REG_OP(ScatterNdUpdate) *@par Outputs: *y: A Tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator TensorScatterUpdate. */ REG_OP(TensorScatterUpdate) .INPUT(x, TensorType::BasicType()) @@ -365,22 +400,25 @@ REG_OP(TensorScatterUpdate) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. \n +*@li var: An ND Tensor. *Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor of type int32. +*@li indices: An ND Tensor of type int32 or int64. -*@li updates: An ND Tensor. \n +*@li updates: An Tensor. format:NCHW, NHWC. *Must be one of the following types: float16, float32, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", the operation + * will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterAdd. */ REG_OP(ScatterAdd) .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -395,23 +433,23 @@ REG_OP(ScatterAdd) *@par Inputs: * Three inputs, including: -*@li var: An NCHW, NHWC, or ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An NCHW, NHWC, or ND Tensor. \n +*@li var: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor. *Must be one of the following types: int32 -*@li updates: An NCHW, NHWC, or ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 +*@li updates: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. -*@li isRef: An optional bool. Defaults to "True" +*@li use_locking: An optional bool. Defaults to "False". If "True", + * the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterDiv. */ REG_OP(ScatterDiv) .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -426,22 +464,21 @@ REG_OP(ScatterDiv) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor. \n - +*@li var: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor. *Must be one of the following types: int32 -*@li updates: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 - +*@li updates: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", + * the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNdAdd. */ REG_OP(ScatterNdAdd) .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -469,6 +506,8 @@ REG_OP(ScatterNdAdd) *@par Outputs: *y: A Tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator TensorScatterAdd. */ REG_OP(TensorScatterAdd) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -482,22 +521,22 @@ REG_OP(TensorScatterAdd) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor. \n - -*Must be one of the following types: int32 -*@li updates: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 +*@li var: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor. +*Must be one of the following types: int32, int64 +*@li updates: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", + * the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNdSub. */ REG_OP(ScatterNdSub) .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -525,6 +564,8 @@ REG_OP(ScatterNdSub) *@par Outputs: *y: A Tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator TensorScatterSub. */ REG_OP(TensorScatterSub) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -538,22 +579,21 @@ REG_OP(TensorScatterSub) *@par Inputs: * Three inputs, including: -*@li var: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An ND Tensor. \n - -*Must be one of the following types: int32 -*@li updates: An ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 - +*@li var: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor. +*Must be one of the following types: int32, int64 +*@li updates: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", + * the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterSub. */ REG_OP(ScatterSub) .INPUT(var, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -574,6 +614,8 @@ REG_OP(ScatterSub) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator DiagPart. */ REG_OP(DiagPartD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -584,12 +626,15 @@ REG_OP(DiagPartD) /** *@brief: Returns the batched diagonal part of a batched tensor. -*@par Inputs:\n -*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. +*@par Inputs: +*x: A Tensor. Must be one of the following types: +* float16, float32, int32, int64, double, complex64, complex128. *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator DiagPart. */ REG_OP(DiagPart) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_DOUBLE, @@ -611,11 +656,14 @@ REG_OP(DiagPart) *@par Attributes: *@li num_output: Reserved. *@li transpose: A bool, specifying whether to transpose, either "true" or "false". Defaults to "false". -*@li axis: Reserved. +*@li axis: Optional. A int. 1 or 2. *@li offset_x: Reserved. *@par Outputs: -*y: The result tensor of type float16, int8, float32. +*y: The result tensor of type float16, int32, float32. + +*@par Third-party framework compatibility +* Compatible with the Caffe operator InnerProduct. *@par Quantization supported or not * Yes @@ -637,12 +685,16 @@ REG_OP(FullyConnection) *@par Inputs: * Three inputs, including: -*@li labels: A Tensor. Must be one of the following types: float16, float32, int32, int8. -*@li predictions: A Tensor. Must be one of the following types: float16, float32, int32, int8. -*@li weights: A Tensor. Must be one of the following types: float16, float32, int32, int8. +*@li labels: A Tensor. Must be one of the following types: float16, float32, +* int32, int8, uint8. +*@li predictions: A Tensor. Must be one of the following types: float16, +* float32, int32, int8, uint8. +*@li weights: A Tensor. Must be one of the following types: float16, float32, +* int32, int8, uint8. *@par Attributes: -*@li num_classes: An integer for the shape of the output matrix. No default value. +*@li num_classes: An integer for the shape of the output matrix. +* No default value. *@li dtype: Data type of the confusion matrix. No default value. *@par Outputs: @@ -650,10 +702,13 @@ REG_OP(FullyConnection) *@attention Constraints: *@li "weights", "labels", and "predictions" are 1D tensors. -*@li The output is with shape (num_classes, num_classes), where, 1 <= num_classes <= 4096. +*@li The output is with shape (num_classes, num_classes), +* where, 1 <= num_classes <= 4096. *@see Region() +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ConfusionMatrix. */ REG_OP(ConfusionMatrix) .INPUT(labels, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16, DT_INT8, DT_UINT8})) @@ -669,22 +724,23 @@ REG_OP(ConfusionMatrix) *@par Inputs: * Three inputs, including: -*@li var: An NCHW, NHWC, or ND Tensor. \n - -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An NCHW, NHWC, or ND Tensor. \n - +*@li var: An ND Tensor. +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor. *Must be one of the following types: int32 -*@li updates: An NCHW, NHWC, or ND Tensor. \n +*@li updates: An ND Tensor. -*Must be one of the following types: float16, float32, int32, int8, uint8 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", the operation + * will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterMul. */ REG_OP(ScatterMul) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_INT8,DT_UINT8})) @@ -695,26 +751,29 @@ REG_OP(ScatterMul) .OP_END_FACTORY_REG(ScatterMul) /** -*@brief Reduces sparse updates into a variable reference using the "min" operation. +*@brief Reduces sparse updates into a variable reference using + * the "min" operation. *@par Inputs: * Three inputs, including: -*@li var: An NCHW, NHWC, or ND Tensor. \n - -*Must be one of the following types: float16, float32, int32 -*@li indices: An NCHW, NHWC, or ND Tensor. \n +*@li var: An ND Tensor. +*Must be one of the following types: float16, float, int32 +*@li indices: An ND Tensor. *Must be one of the following types: int32 -*@li updates: An NCHW, NHWC, or ND Tensor. \n -*Must be one of the following types: float16, float32, int32 +*@li updates: An ND Tensor. +*Must be one of the following types: float16, float, int32 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", the operation + * will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterMin. */ REG_OP(ScatterMin) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) @@ -729,22 +788,25 @@ REG_OP(ScatterMin) *@par Inputs: * Three inputs, including: -*@li var: An NCHW, NHWC, or ND Tensor. \n +*@li var: An ND Tensor. -*Must be one of the following types: float16, float32, int32 -*@li indices: An NCHW, NHWC, or ND Tensor. \n +*Must be one of the following types: float16, float, int32 +*@li indices: An NCHW, NHWC, or ND Tensor. *Must be one of the following types: int32 -*@li updates: An NCHW, NHWC, or ND Tensor. \n +*@li updates: An NCHW, NHWC, or ND Tensor. -*Must be one of the following types: float16, float32, int32 +*Must be one of the following types: float16, float, int32 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". + * If "True", the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterMax. */ REG_OP(ScatterMax) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32})) @@ -759,22 +821,25 @@ REG_OP(ScatterMax) *@par Inputs: * Three inputs, including: -*@li var: An NCHW, NHWC, or ND Tensor. \n +*@li var: An ND Tensor. -*Must be one of the following types: float16, float32, int32, int8, uint8 -*@li indices: An NCHW, NHWC, or ND Tensor. \n +*Must be one of the following types: float16, float, int32, int8, uint8 +*@li indices: An ND Tensor. *Must be one of the following types: int32 -*@li updates: An NCHW, NHWC, or ND Tensor. \n +*@li updates: An ND Tensor. -*Must be one of the following types: float16, float32, int32, int8, uint8 +*Must be one of the following types: float16, float, int32, int8, uint8 *@par Attributes: -*use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock. +*use_locking: An optional bool. Defaults to "False". If "True", + * the operation will be protected by a lock. *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(ScatterUpdate) .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) @@ -802,6 +867,8 @@ REG_OP(ScatterUpdate) *@par Outputs: *diagonal: The extracted diagonal(s). +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixDiagPartV2) .INPUT(input, TensorType::BasicType()) @@ -828,6 +895,8 @@ REG_OP(MatrixDiagPartV2) *@par Outputs: *output: Rank `r+1`, with `output.shape = input.shape`. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixSetDiagV2) .INPUT(input, TensorType::BasicType()) @@ -864,6 +933,8 @@ REG_OP(MatrixSetDiagV2) *@par Outputs: *output: Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterUpdate. */ REG_OP(MatrixDiagV2) .INPUT(diagonal, TensorType::BasicType()) diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index 0a1337c0..b89287e9 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -91,6 +91,9 @@ REG_OP(FusedBatchNorm) * @li dx: A Tensor. Must be one of the following types: float32. * @li bn_scale: A Tensor. Must be one of the following types: float32. * @li bn_bias: A Tensor. Must be one of the following types: float32. + +*@par Third-party framework compatibility +* Compatible with the L2 scenario of PyTorch operator Normalize. */ REG_OP(FusedBatchNormGrad) @@ -118,9 +121,11 @@ REG_OP(FusedBatchNormGrad) *@li axis: A required attribute of type list, specifying the axis for normalization. *@li eps: An optional attribute of type float, specifying the lower limit of normalization. Defaults to "1e-4". -*@par Outputs: \n +*@par Outputs: *y: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue for normalization. +*@par Third-party framework compatibility +* Compatible with the L2 scenario of PyTorch operator Normalize. */ REG_OP(L2Normalize) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -134,17 +139,24 @@ REG_OP(L2Normalize) *@par Inputs: * Three inputs, including: \n -*@li x: A multi-dimensional Tensor of type float16 or float32, specifying the eigenvalue of forward inputs. -*@li y: A multi-dimensional Tensor of type float16 or float32, specifying the normalization result of the forward output. -*@li dy: A multi-dimensional Tensor of type float16 or float32, specifying the reverse input gradient. +*@li x: A multi-dimensional Tensor of type float16 or float32, specifying +* the eigenvalue of forward inputs. +*@li y: A multi-dimensional Tensor of type float16 or float32, specifying +* the normalization result of the forward output. +*@li dy: A multi-dimensional Tensor of type float16 or float32, specifying +* the reverse input gradient. *@par Attributes: -*@li axis: A required attribute of type int, specifying the axis to be normalized. -*@li eps: An optional attribute of type float, specifying the lower limit of normalization. Defaults to "1e-4". +*@li axis: A required attribute of type int, specifying the axis to be +* normalized. +*@li eps: An optional attribute of type float, specifying the lower limit of +* normalization. Defaults to "1e-4". *@par Outputs: *dx: Reverse gradient of eigenvalue "x". Has the same dimensions as "x". +*@par Third-party framework compatibility +* Compatible with the L2 scenario of PyTorch operator NormalizeGrad. */ REG_OP(L2NormalizeGrad) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -183,6 +195,9 @@ REG_OP(L2NormalizeGrad) *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator fused_batch_norm. +*@li Compatible with the TensorFlow operator fused_batch_norm_v2. */ REG_OP(BatchNorm) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -228,6 +243,8 @@ REG_OP(BatchNorm) *@li If the operation is used for inference, then output "reserve_space_1" has the same value as "mean" and output "reserve_space_2" has the same value as "variance". *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator fused_batch_norm_v2. */ REG_OP(BatchNormExt2) .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -272,6 +289,8 @@ REG_OP(BatchNormExt2) * The preceding layer of this operator must be operator BatchNorm. *@see BatchNorm +*@par Third-party framework compatibility +* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNormGrad. */ REG_OP(BatchNormGrad) .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -316,6 +335,8 @@ REG_OP(BatchNormGrad) * The preceding layer of this operator must be BatchNormExt2. *@see BatchNormExt2 +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator FusedBatchNormGradV2. */ REG_OP(BatchNormGradExt2) .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -342,10 +363,10 @@ REG_OP(BatchNormGradExt2) *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. *@li momentum: An optional string, input x's Scale factor -*@li scale: no use -*@li offset: no use +*@li scale: An optional tensor of type float16 or float32, no use +*@li offset: An optional tensor of type float16 or float32, no use *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional input, not use *@par Outputs:\n @@ -372,7 +393,7 @@ REG_OP(BNInference) *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. *@li momentum: An optional float, input x's Scale factor *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional inpout, not use *@par Outputs: @@ -383,6 +404,8 @@ REG_OP(BnHost) .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) .INPUT(momentum, TensorType({DT_FLOAT16,DT_FLOAT})) + .OPTIONAL_INPUT(scale, TensorType({DT_FLOAT16,DT_FLOAT})) + .OPTIONAL_INPUT(offset, TensorType({DT_FLOAT16,DT_FLOAT})) .ATTR(epsilon, Float, 0.00001) .ATTR(mode, Int, 1) .ATTR(use_global_stats, Bool, true) @@ -398,10 +421,10 @@ REG_OP(BnHost) *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. *@li momentum: An optional float, input x's Scale factor -*@li scale: no use -*@li offset: no use +*@li scale: An optional tensor of type float16 or float32, no use +*@li offset: An optional tensor of type float16 or float32, no use *@par Attributes: -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". *@li mode: An optional inpout, not use *@par Outputs:\n diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index 85062248..f904f191 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -63,6 +63,10 @@ namespace ge { * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * * stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. +* @li Compatible with the Caffe operator DepthwiseConv2DBackpropFilter. */ REG_OP(DepthwiseConv2DBackpropFilter) .INPUT(input, TensorType({float16})) @@ -116,6 +120,10 @@ REG_OP(DepthwiseConv2DBackpropFilter) * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * * stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. +* @li Compatible with the Caffe operator DepthwiseConv2DBackpropFilter. */ REG_OP(DepthwiseConv2DBackpropFilterD) .INPUT(input, TensorType({float16})) @@ -172,6 +180,10 @@ REG_OP(DepthwiseConv2DBackpropFilterD) * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput. +* @li Compatible with the Caffe operator DepthwiseConv2DBackpropInput. */ REG_OP(DepthwiseConv2DBackpropInput) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -225,6 +237,10 @@ REG_OP(DepthwiseConv2DBackpropInput) * where C is the same as that of the feature map and C0 is 16.\n * Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf * * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropInput. +* @li Compatible with the Caffe operator DepthwiseConv2DBackpropInput. */ REG_OP(DepthwiseConv2DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) @@ -245,8 +261,8 @@ REG_OP(DepthwiseConv2DBackpropInputD) *Two required inputs and two optional inputs, including: \n * @li x: A 4D tensor of type float16, with shape [N, C, H, W] or [N, H, W, C] * @li filter: A 4D tensor of type float16, with shape [H, W, C, K] -* @li bias: An optional tensor of type int8 -* @li offset_w: An optional float16, used for quantized inference +* @li bias: An optional tensor of type float16 or int32 +* @li offset_w: An optional float16 or int8, used for quantized inference * @par Attributes: * @li strides: A required list or tuple. The stride of the sliding window for @@ -261,8 +277,9 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @li pads: A required list or tuple. Padding added to each dimension of the * input. * @li data_format: An optional string. Input data format, either "NHWC" or -* "NCHW". -* @li offset_a: An optional int. Input offset, used for quantized inference. +* "NCHW". Defaults to "NHWC". +* @li offset_x: An optional int. Input offset, used for quantized inference. +* Defaults to 0. * @par Outputs: * y: 4D tensor of type float16, with shape [N, C, H, W] or [N, H, W, C] @@ -279,6 +296,10 @@ REG_OP(DepthwiseConv2DBackpropInputD) * @par Quantization supported or not * Yes + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator DepthwiseConv2D. +* @li Compatible with the Caffe operator DepthwiseConv2D. */ REG_OP(DepthwiseConv2D) .INPUT(x, TensorType({DT_FLOAT16, DT_INT8})) @@ -347,13 +368,16 @@ REG_OP(Conv2DBackpropInputCCE) * For NCHW data format, the feature dimension is the third-to-last. *@par Inputs: -*x: A Tensor of type TensorType::NumberType(). +*x: A Tensor of type NumberType. *@par Attributes: *data_format: Data format. Defaults to "NHWC". *@par Outputs: *y: A Tensor.Has the same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BiasAddGrad. */ REG_OP(BiasAddGrad) .INPUT(x, TensorType::NumberType()) @@ -373,7 +397,7 @@ REG_OP(BiasAddGrad) * @li out_backprop: A Tensor. Must have the same type as filter. 4-D with shape [batch, out_height, out_width, out_channels] * or [batch, out_channels, out_height, out_width]. Gradients with respect to the output of the convolution. *@par Attributes: - * Three attributes: + * Five attributes: * @li strides: A tuple/list of 2 integers. The stride of the sliding window for H/W dimension. * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on feature map * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension of input, now only support [1,1,1,1] @@ -381,6 +405,8 @@ REG_OP(BiasAddGrad) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. *@par Outputs: * y: A Tensor. Has the same type as filter,and has same format as input_size +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv2d_backprop_input */ REG_OP(Conv2DBackpropInput) .INPUT(input_size, TensorType({DT_INT32})) @@ -404,7 +430,7 @@ REG_OP(Conv2DBackpropInput) * @li out_backprop: A Tensor. Must have the same type as filter. 4-D with shape [batch, out_height, out_width, out_channels] * or [batch, out_channels, out_height, out_width]. Gradients with respect to the output of the convolution. *@par Attributes: - * Four attributes: + * Six attributes: * @li input_size A Tensor of type int32. An integer vector representing the shape of input, * where input is a 4-D tensor [batch, height, width, channels] or [batch, channels, height, width]. * @li strides: A tuple/list of 2 integers. The stride of the sliding window for H/W dimension. @@ -414,6 +440,8 @@ REG_OP(Conv2DBackpropInput) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. *@par Outputs: * y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width, channels] or [batch, channels, height, width]. +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv2d_backprop_input */ REG_OP(Conv2DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8})) @@ -440,7 +468,7 @@ REG_OP(Conv2DBackpropInputD) * or [out_channels, filter_height, filter_width, in_channels], * or [out_channels, in_channel, filter_height, filter_width]. * Two optional inputs: - * @li bias: An optional tensor of type int8 + * @li bias: An optional tensor of type float16 * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n *@par Attributes: * Six attributes: @@ -484,7 +512,7 @@ REG_OP(Deconvolution) * @li out_backprop: A Tensor. Must have the same type as x. 4-D with shape [batch, out_height, out_width, out_channels] * or [batch, out_channels, out_height, out_width]. Gradients with respect to the output of the convolution. *@par Attributes: - * Three attributes: + * Five attributes: * @li strides: A tuple/list of 2 integers. The stride of the sliding window for H/W dimension. * @li pads: A tuple/list of 4 integers, [top, bottom, left, right] pads on feature map. * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension of input, now only support [1,1,1,1]. @@ -492,6 +520,8 @@ REG_OP(Deconvolution) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. *@par Outputs: * y: A Tensor. Has the same type as x +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv2d_backprop_filter */ REG_OP(Conv2DBackpropFilter) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -514,7 +544,7 @@ REG_OP(Conv2DBackpropFilter) * @li out_backprop: A Tensor. Must have the same type as x. 4-D with shape [batch, out_height, out_width, out_channels] * or [batch, out_channels, out_height, out_width]. Gradients with respect to the output of the convolution. *@par Attributes: - * Four attributes: + * Six attributes: * @li filter_size: A Tensor of type integers. An integer vector representing the tensor shape of filter, * where filter is a 4-D tensor [filter_height, filter_width, in_channels, out_channels] * or [out_channels, filter_height, filter_width, in_channels] or [out_channels, in_channel, filter_height, filter_width]. @@ -525,6 +555,8 @@ REG_OP(Conv2DBackpropFilter) * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data. *@par Outputs: * y: A Tensor. Has the same type as x +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv2d_backprop_filter */ REG_OP(Conv2DBackpropFilterD) .INPUT(x, TensorType({DT_FLOAT16})) @@ -544,8 +576,8 @@ REG_OP(Conv2DBackpropFilterD) * @li x: A 4D tensor of input images. * @li filter: A 4D tensor of filters. * @li bias: An optional 1D tensor. -* @li offset_w: An optional 1D tensor for quantized convolution. Reserved.\n -* \n +* @li offset_w: An optional 1D tensor for quantized convolution. Reserved. +* * The input and output tensor attributes are listed as follows: * @verbatim Tensor | x | filter | bias | offset_w | y @@ -584,7 +616,7 @@ REG_OP(Conv2DBackpropFilterD) * @li y: A 4D Tensor of output images. *@attention -* @li The parameter scope is listed as follows:\n +* @li The parameter scope is listed as follows: * @verbatim Name | Field | Scope ------------------|--------------|---------- @@ -621,6 +653,10 @@ REG_OP(Conv2DBackpropFilterD) *@par Quantization supported or not * Yes + +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator "conv2d". +*@li Compatible with the Caffe operator 2D "Convolution". */ REG_OP(Conv2D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8})) @@ -670,6 +706,9 @@ REG_OP(Conv2DCompress) *@attention Constraints:\n *The image size after padding is greater than the filter size.\n +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator conv3d. +*@li Compatible with the Caffe operator Convolution. */ REG_OP(Conv3D) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -699,6 +738,8 @@ REG_OP(Conv3D) * @li data_format: An optional string from: "NDHWC", "NCHWD". Defaults to "NDHWC". Specify the data format of the input and output data. *@par Outputs: * y: A Tensor. Has the same type as filter,and has same format as input_size +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv3d_backprop_input */ REG_OP(Conv3DBackpropInput) .INPUT(input_size, TensorType({DT_INT32, DT_INT64})) @@ -726,6 +767,8 @@ REG_OP(Conv3DBackpropInput) * @li data_format: An optional string from: "NDHWC", "NCHWD". Defaults to "NDHWC". Specify the data format of the input and output data. *@par Outputs: * y: A Tensor. Has the same type as filter +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv3d_backprop_input */ REG_OP(Conv3DBackpropInputD) .INPUT(filter, TensorType({DT_FLOAT16})) @@ -754,5 +797,75 @@ REG_OP(LSTM) .ATTR(num_output, Int, 0) .ATTR(expose_hidden, Bool, false) .OP_END_FACTORY_REG(LSTM) + +/** +*@brief Computes the gradients of convolution3D with respect to the filter +*@par Inputs: + * Three inputs: + * @li x: A Tensor. Must be one of the following types: float16 + * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] or [batch, in_depth, in_channels, in_height, in_width]. + * @li filter_size: A Tensor of type int32. An integer vector representing the tensor shape of filter, + * where filter is a 5-D tensor [filter_depth, filter_height, filter_width, in_channels, out_channels] + * or [out_channels, filter_depth, filter_height, filter_width, in_channels] or [out_channels, filter_depth, in_channel, filter_height, filter_width]. + * @li out_backprop: A Tensor. Must have the same type as x. 5-D with shape [batch, out_depth, out_height, out_width, out_channels] + * or [batch, out_depth, out_channels, out_height, out_width]. Gradients with respect to the output of the convolution. +*@par Attributes: + * Three attributes: + * @li strides: A tuple/list of 3 integers. The stride of the sliding window for D/H/W dimension. + * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] pads on feature map. + * @li dilations: A tuple/list of 5 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1]. + * @li groups: Number of blocked connections from input channels to output channels. + * @li data_format: An optional string from: "NDHWC", "NDCHW". Defaults to "NDHWC". Specify the data format of the input and output data. +*@par Outputs: + * y: A Tensor. Has the same type as x +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv3d_backprop_filter +*/ +REG_OP(Conv3DBackpropFilter) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .INPUT(filter_size, TensorType({DT_INT32})) + .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") + .OP_END_FACTORY_REG(Conv3DBackpropFilter) + +/** +*@brief Computes the gradients of convolution with respect to the filter. +*@par Inputs: + * Two inputs: + * @li x: A Tensor. Type is float16. + * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] or [batch, in_depth, in_channels, in_height, in_width]. + * @li out_backprop: A Tensor. Must have the same type as x. 5-D with shape [batch, out_depth, out_height, out_width, out_channels] + * or [batch, out_depth, out_channels, out_height, out_width]. Gradients with respect to the output of the convolution. +*@par Attributes: + * Four attributes: + * @li filter_size: A Tensor of type integers. An integer vector representing the tensor shape of filter, + * where filter is a 5-D tensor [filter_depth, filter_height, filter_width, in_channels, out_channels] + * or [out_channels, filter_depth, filter_height, filter_width, in_channels] or [out_channels, filter_depth, in_channel, filter_height, filter_width]. + * @li strides: A tuple/list of 3 integers. The stride of the sliding window for D/H/W dimension. + * @li pads: A tuple/list of 6 integers, [front, back, top, bottom, left, right] pads on feature map + * @li dilations: A tuple/list of 5 integers, The dilation factor for each dimension of input, now only support [1,1,1,1,1]. + * @li groups: Number of blocked connections from input channels to output channels. + * @li data_format: An optional string from: "NDHWC", "NDCHW". Defaults to "NDHWC". Specify the data format of the input and output data. +*@par Outputs: + * y: A Tensor. Has the same type as x +*@par Third-party framework compatibility + * Compatible with Tensorflow's conv3d_backprop_filter +*/ +REG_OP(Conv3DBackpropFilterD) + .INPUT(x, TensorType({DT_FLOAT16})) + .INPUT(out_backprop, TensorType({DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT})) + .REQUIRED_ATTR(filter_size, ListInt) + .REQUIRED_ATTR(strides, ListInt) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(dilations, ListInt, {1, 1, 1, 1, 1}) + .ATTR(groups, Int, 1) + .ATTR(data_format, String, "NDHWC") + .OP_END_FACTORY_REG(Conv3DBackpropFilterD) } // namespace ge #endif // GE_OP_NN_CALCULATION_OPS_H diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h index 90b49720..7d6007d9 100644 --- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h @@ -23,21 +23,33 @@ namespace ge { /** -*@brief Generates bounding boxes based on "rois" and "deltas". It is a customized FasterRcnn operator. +*@brief Generates bounding boxes based on "rois" and "deltas". +* It is a customized FasterRcnn operator. *@par Inputs: * Two inputs, including: \n -*@li rois: Region of interests (ROIs) generated by the region proposal network (RPN). A 2D Tensor of type float 32 with shape (N, 4). "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1", "y0", and "y1". -*@li deltas: Absolute variation between the ROIs generated by the RPN and ground truth boxes. A 2D Tensor of type float32 with shape (N, 4). "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh". +*@li rois: Region of interests (ROIs) generated by the region proposal +* network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4). +* "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1", +* "y0", and "y1". +*@li deltas: Absolute variation between the ROIs generated by the RPN and +* ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4). +* "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh". *@par Attributes: -*@li means: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means". -*@li stds: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means". -*@li max_shape: Shape [h, w], specifying the size of the image transferred to the network. Used to ensure that the bbox shape after conversion does not exceed "max_shape". -*@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall within (-wh_ratio_clip, wh_ratio_clip). +*@li means: An index of type int. Defaults to [0,0,0,0]. +* "deltas" = "deltas" x "stds" + "means". +*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0]. +* "deltas" = "deltas" x "stds" + "means". +*@li max_shape: Shape [h, w], specifying the size of the image transferred to +* the network. Used to ensure that the bbox shape after conversion does not +* exceed "max_shape". +*@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall +* within (-wh_ratio_clip, wh_ratio_clip). *@par Outputs: -*bboxes: Bboxes generated based on "rois" and "deltas". Have the same format and type as "rois". +*bboxes: Bboxes generated based on "rois" and "deltas". Have the same format +* and type as "rois". */ REG_OP(BoundingBoxDecode) .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -50,16 +62,23 @@ REG_OP(BoundingBoxDecode) .OP_END_FACTORY_REG(BoundingBoxDecode) /** -*@brief Computes the coordinate variations between bboxes and ground truth boxes. It is a customized FasterRcnn operator. +*@brief Computes the coordinate variations between bboxes and ground truth +* boxes. It is a customized FasterRcnn operator. *@par Inputs: * Two inputs, including: \n -*@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". -*@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". +*@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4). +* "N" indicates the number of bounding boxes, and the value "4" refers to +* "x0", "x1", "y0", and "y1". +*@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with +* shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" +* refers to "x0", "x1", "y0", and "y1". *@par Attributes: -*@li means: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means". -*@li stds: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means". +*@li means: An index of type int. Defaults to [0,0,0,0]. +* "deltas" = "deltas" x "stds" + "means". +*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0]. +* "deltas" = "deltas" x "stds" + "means". *@par Outputs: *delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes. @@ -73,18 +92,24 @@ REG_OP(BoundingBoxEncode) .OP_END_FACTORY_REG(BoundingBoxEncode) /** -*@brief Judges whether the bounding box is valid. It is a customized FasterRcnn operator. +*@brief Judges whether the bounding box is valid. It is a customized +* FasterRcnn operator. *@par Inputs: * Two inputs, including: \n -*@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4). "N" indicates the number of bounding boxes, the value "4" indicates "x0", "x1", "y0", and "y1". -*@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16 with shape (16,) +*@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4). +* "N" indicates the number of bounding boxes, the value "4" indicates "x0", +* "x1", "y0", and "y1". +*@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16 +* with shape (16,) *@par Outputs: -*valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is in an image. "1" indicates valid, while "0" indicates invalid. +*valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is +* in an image. "1" indicates valid, while "0" indicates invalid. *@attention Constraints: -* 16 "img_metas" are input. The first three numbers (height, width, ratio) are valid, specifying the valid boundary (heights x ratio, weights x ratio). +* 16 "img_metas" are input. The first three numbers (height, width, ratio) are +* valid, specifying the valid boundary (heights x ratio, weights x ratio). */ REG_OP(CheckValid) .INPUT(bbox_tensor, TensorType({DT_FLOAT16})) @@ -93,21 +118,28 @@ REG_OP(CheckValid) .OP_END_FACTORY_REG(CheckValid) /** -*@brief Computes the intersection over union (iou) or the intersection over foreground (iof) based on the ground-truth and predicted regions. +*@brief Computes the intersection over union (iou) or the intersection over +* foreground (iof) based on the ground-truth and predicted regions. *@par Inputs: * Two inputs, including: \n -*@li bboxes: Bounding boxes, a 2D Tensor of type float16 with shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". -*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 with shape (M, 4). "M" indicates the number of ground truth boxes, and the value "4" refers to "x0", "x1", "y0", and "y1". +*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with +* shape (N, 4). "N" indicates the number of bounding boxes, and the value +* "4" refers to "x0", "x1", "y0", and "y1". +*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32 +* with shape (M, 4). "M" indicates the number of ground truth boxes, and +* the value "4" refers to "x0", "x1", "y0", and "y1". *@par Attributes: *mode: Computation mode, a character string with the value range of [iou, iof]. *@par Outputs: -*overlap: A 2D Tensor of type float16 with shape [M, N], specifying the IoU or IoF ratio. +*overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying +* the IoU or IoF ratio. *@attention Constraints: -* Only computation of float16 data is supported. To avoid overflow, the input length and width are scaled by 0.2 internally. +* Only computation of float16 data is supported. To avoid overflow, the input +* length and width are scaled by 0.2 internally. */ REG_OP(Iou) .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -152,18 +184,22 @@ REG_OP(ROIAlignGrad) *@par Inputs: * Three inputs, including: \n -*@li features: A 5HD Tensor of type float32. -*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1". +*@li features: A 5HD Tensor of type float32 or float16. +*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, +* "x0", "x1", "y0", and "y1". *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved. *@par Attributes: *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image. *@li pooled_height: A required attribute of type int, specifying the H dimension. *@li pooled_width: A required attribute of type int, specifying the W dimension. -*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2". +*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", +* the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2". *@par Outputs: -*output: Outputs the feature sample of each ROI position. The format is 5HD. The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height", "pooled_width", and "features", respectively. +* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16. The axis N is the number of input ROIs. Axes H, W, and C are consistent +* with the values of "pooled_height", +* "pooled_width", and "features", respectively. */ REG_OP(ROIAlign) .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -204,6 +240,8 @@ REG_OP(ROIAlign) *@attention Constraints:\n * This operator applies only to SSD networks. *@see SSDDetectionOutput() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(PriorBox) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -252,6 +290,8 @@ REG_OP(ROIAlign) *@attention Constraints:\n * This operator applies only to SSD networks. *@see SSDDetectionOutput() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(PriorBoxD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -292,7 +332,7 @@ REG_OP(ROIAlign) * must be greater than 0. *@li group_size: A required int32, specifying the number of groups to encode * position-sensitive score maps, must be within the range (0, 128). -*@li spatial_scale: A required scaling factor for mapping the input +*@li spatial_scale: A required float32, scaling factor for mapping the input * coordinates to the ROI coordinates. *@par Outputs: @@ -327,16 +367,17 @@ REG_OP(PSROIPooling) *@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0]. *@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0). *@par Outputs: -*box: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -Proposal of actual output, with shape [batch, numBoxes,8], 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024. +*@li box: An NCHW tensor of type float16 or float32, describing the information of each output box, including the coordinates, class, and confidence. +Proposal of actual output, with output shape [batch, numBoxes,8], 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024. That is, take min (the maximum number of input boxes, 1024) -*actual_bbox_num: An NCHW tensor of type int32, specifying the number of output boxes. -With shape [bacth, num_classes], Actual number of bboxes output +*@li actual_bbox_num: An NCHW tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes. *@attention Constraints:\n *@li totalnum < max_rois_num * batch_rois. *@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images. *@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(FSRDetectionOutput) .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -362,20 +403,21 @@ REG_OP(FSRDetectionOutput) *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput. *@par Attributes: *@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025. -*@li share_location: An option bool, specify the shared location. Defaults to True -*@li background_label_id: An option int32, specify the background label id. Must be 0 -*@li iou_threshold: An option float32, specify the nms threshold -*@li top_k: An option int32, specify the topk value. Defaults to 200 -*@li eta: An option float32, specify the eta value. Defaults to 1 -*@li variance_encoded_in_target: An option bool, specify whether variance encoded in target or not. Defaults to False -*@li code_type: An option int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3 -*@li keep_top_k: An option int32, specify the topk value after nms. Defaults to -1 -*@li confidence_threshold: An option float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold +*@li share_location: An optional bool, specify the shared location. Defaults to True +*@li background_label_id: An optional int32, specify the background label id. Must be 0 +*@li iou_threshold: An optional float32, specify the nms threshold +*@li top_k: An optional int32, specify the topk value. Defaults to 200 +*@li eta: An optional float32, specify the eta value. Defaults to 1.0 +*@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False +*@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3 +*@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1 +*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold *@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output". *@par Outputs: -*out_boxnum: An NCHW tensor of type int32, specifying the number of output boxes. -*y: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -With shape [batch,keep_top_k,8], 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, yman, null) +*@li out_boxnum: An NCHW tensor of type int32, specifying the number of output boxes. +*@li y: An NCHW tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box, including the coordinates, +* class, and confidence. In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null) +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(SSDDetectionOutput) .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -408,7 +450,7 @@ REG_OP(SSDDetectionOutput) *@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3". *@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". *@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". -*@li background: A bool. +*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. *@par Outputs: *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2], where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box. @@ -418,6 +460,8 @@ REG_OP(SSDDetectionOutput) *@attention Constraints: *@li This operator applies to YOLO v2 and v3 networks. *@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(Yolo) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -440,7 +484,7 @@ REG_OP(Yolo) * Four inputs, including: *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput. \n Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n +*@li img_info: A float16 or float32, describing the image information including the required image height and width \n and the actual image height and width. * *@par Attributes: @@ -457,16 +501,18 @@ and the actual image height and width. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: -*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -With shape [batch,6,post_nms_topn], 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. -*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes. -With shape [batch,8,1,1], means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 +*@li boxout: An NCHW tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box, including the coordinates, class, +and confidence. In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: An NCHW tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid, +the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 * *@attention Constraints:\n *@li This operator applies only to the YOLO v2 network. *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator. * *@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV2DetectionOutput) .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -525,6 +571,8 @@ With shape [batch,8,1,1], means only the first one of the 8 numbers is valid, th *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator. *@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV2DetectionOutputD) .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -554,7 +602,7 @@ REG_OP(YoloV2DetectionOutputD) *Ten inputs, including: *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo. -*@li imginfo: A float16, describing the image information including the required image height and width \n +*@li img_info: A float16 or float32, describing the image information including the required image height and width \n and the actual image height and width. * *@par Attributes: @@ -573,16 +621,18 @@ and the actual image height and width. *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512". * *@par Outputs: -*@li boxout: An NCHW tensor of type float16, describing the information of each output box, including the coordinates, class, and confidence. -With shape [batch,6,post_nms_topn], 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. -*@li boxoutnum: An NCHW tensor of type int32, specifying the number of output boxes. -With shape [batch,8,1,1], means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 +*@li boxout: An NCHW tensor of type float16 or float32 with shape [batch,6,post_nms_topn], describing the information of each output box, including the coordinates, class, and confidence. +In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num. +*@li boxoutnum: An NCHW tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. +The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024 *@attention Constraints:\n *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. *@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV3DetectionOutput) .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -648,6 +698,8 @@ With shape [batch,8,1,1], means only the first one of the 8 numbers is valid, th *@li This operator applies only to the YOLO v3 network. *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators. *@see Yolo() +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(YoloV3DetectionOutputD) .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -700,8 +752,11 @@ REG_OP(YoloV3DetectionOutputD) *@attention Constraints: * @li pyramid_height: pyramid_heigjt should be in range [0,7). +* Pooling paramter should statisfied with caffe pooling param(pad= 0","axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n * If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "scale" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). *@li If "bias" is not None, the constraints for "bias" is the same as that for "scale". +*@par Third-party framework compatibility +* Compatible with the Caffe operator Scale. */ REG_OP(Scale) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */ @@ -630,16 +674,22 @@ REG_OP(Scale) *@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32. *@par Attributes: -* depth_radius = (local_size + 1) / 2. Defaults to "5". +*@li depth_radius: An optional int32, specifying the half-width of the normalization window. Defaults to "5". +* under the caffe framework, if local_size is provided and is an odd number, +* depth_radius = (local_size - 1) / 2. local_size is the number of channels to sum over (for ACROSS_CHANNELS) +* or the side length of the square region to sum over (for WITHIN_CHANNEL). *@li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0. * Defaults to "1". *@li alpha: An optional float32. A scaling factor, usually positive. * Defaults to "1". +*@li beta: An optional float32. An exponent. Defaults to "0.75" for the caffe framework, Defaults to "0.5" for others. *@li norm_region: An optional string. A mode option. "ACROSS_CHANNELS":0, "WITHIN_CHANNEL":1. Defaults to "ACROSS_CHANNELS". *@par Outputs: *y: A Tensor. Has the same data type and shape as "x". +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator LRN. */ REG_OP(LRN) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -674,6 +724,8 @@ REG_OP(LRN) * @attention Constraints: * "x" and "y" must have the same shape and type as "grads". +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator LRNGrad. */ REG_OP(LRNGrad) .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -703,6 +755,8 @@ REG_OP(LRNGrad) *@par Attributes: *@li blank_label: An optional attribute. Defaults to 0. + *@par Third-party framework compatibility + * Compatible with TensorFlow RNNTLoss operator. */ REG_OP(RNNTLoss) .INPUT(acts, TensorType({DT_FLOAT})) @@ -762,6 +816,8 @@ ce_2" has the same value as "variance". *@li For Ascend 310, the result accuracy fails due to the square root \n instruction. +*@par Third-party framework compatibility +*@li Compatible with the PyTorch operator GroupNorm. */ REG_OP(GroupNorm) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 0e57334f..f167dbee 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -25,7 +25,7 @@ namespace ge { /** *@brief Performs pooling on the input. *@par Inputs: -*@li x: An NCHW tensor of type float16, float32. +*@li x: An NCHW tensor of type float16, float32, int8. *@par Attributes: *@li mode: An optional int32, specifying the pooling algorithm, either "1" (max pooling) or "0" (avg pooling). Defaults to "0". *@li global_pooling: An optional bool. Defaults to "false". @@ -47,11 +47,14 @@ namespace ge { *dilation[3]: An optional int32, specifying the right dilation. Defaults to "1". \n *@li ceil_mode: An optional int32, either "0" (ceil mode) or "1" (floor mode). Defaults to "0". *@par Outputs: -*y: An NCHW tensor of type float16, float32. +*y: An NCHW tensor of type float16, float32, int32. *@attention Constraints:\n *@li window[0] * window[1] < 256; *@li 1<=input_h<=4096,1<=input_w<=4096 *@li If input tensor N is a prime number, it should be less than 65535. +*@par Third-party framework compatibility +*@li Compatible with the Caffe operator Pooling. +*@li Compatible with the TensorFlow operator Pooling. */ REG_OP(Pooling) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_INT8})) @@ -69,7 +72,7 @@ REG_OP(Pooling) *@brief Performs average pooling on the input. *@par Inputs: -*x: A tensor of type float16. +*x: A tensor of type float16, float32, double. *@par Attributes: *@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, where N = C = 1, and H and W are positive integers within the range [1, 32768]. @@ -78,13 +81,16 @@ REG_OP(Pooling) *@li data_format: An optional string, specifying the data format of "ksize" and "strides", either "NCHW", "NC1HWC0", or "NHWC" (default). *@par Outputs: -*y: The average pooled output tensor. +*y: The average pooled output tensor. Has the same type and format as input "x". -*@attention Constraints:\n +*@attention Constraints: +*@li This operator applies only to a TensorFlow network. *@li Only single input and single output are supported. *@li Global pooling is supported. *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 32768]. ksize_H * ksize_W < 256 *@li Due to instruction restrictions, the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AvgPool. */ REG_OP(AvgPool) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE})) @@ -117,6 +123,8 @@ REG_OP(AvgPool) *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolV2. */ REG_OP(MaxPoolExt2) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8, @@ -136,24 +144,32 @@ REG_OP(MaxPoolExt2) *@par Inputs: * One input: -*x: An NC1HWC0 Tensor of type float16. - +*x: An NC1HWC0 Tensor. Supported type:float16, float32, double, int8, int16, + * int32, int64, uint8, uint16, qint8 *@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. +*@li ksize: A required list of int8, int16, int32, or int64 values, + * specifying the size of the window for each dimension of the input tensor. + * No default value. +*@li strides: A required list of int8, int16, int32, or int64 values, + * specifying the stride of the sliding window for each dimension of + * the input tensor. No default value. *@li padding: A required string. No default value. -*@li data_format: An optional string. Defaults to "NC1HWC0". +*@li data_format: An optional string. Defaults to "NHWC". *@par Outputs: *y: A Tensor. Has the same type and format as input "x". *@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. +*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, + * ksize[1] * ksize[2] <= 255. +*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, + * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPool. */ REG_OP(MaxPool) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32, DT_DOUBLE, DT_INT8, @@ -204,6 +220,9 @@ REG_OP(MaxPool3D) * @li Computing gradients of global pooling is not supported, which means * "ksize < x1". * @li "ksiez" is in the range [1, 255]. "strides" is in the range [1, 63] + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolGrad. */ REG_OP(MaxPoolGrad) .INPUT(x1, TensorType::RealNumberType()) @@ -220,9 +239,10 @@ REG_OP(MaxPoolGrad) * @brief Computes second-order gradients of the maxpooling function. * @par Inputs: -* @li x1: Original forward input tensor of type float16 -* @li x2: Original forward output tensor of type float16 -* @li grad: Gradient tensor of type float16 +* @li x1: Original forward input tensor. Supported type:float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. +* @li x2: Has the same type and format as input "x1". +* @li grad:Has the same type and format as input "x1". * @par Attributes: * @li ksize: A required list or tuple, @@ -240,7 +260,10 @@ REG_OP(MaxPoolGrad) * @li "x1", "x2", "grads", and "y" must be 5D tensors. * @par Outputs: -* @li y: Result tensor of type float16 +* @li y: Has the same type and format as input "x1". + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator MaxPoolGradGrad. */ REG_OP(MaxPoolGradGrad) .INPUT(x1, TensorType::RealNumberType()) @@ -274,6 +297,9 @@ REG_OP(MaxPoolGradGrad) *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolV2. */ REG_OP(MaxPoolV2) .INPUT(x, TensorType({DT_FLOAT16})) @@ -285,26 +311,35 @@ REG_OP(MaxPoolV2) .OP_END_FACTORY_REG(MaxPoolV2) /** -*@brief Performs max pooling on the input and outputs both max values and indices. +*@brief Performs max pooling on the input and outputs both max values and + * indices. *@par Inputs: * One input: -*x: An NC1HWC0 Tensor of type float16. - +*x: An NC1HWC0 Tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. *@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. +*@li ksize: A required list of int8, int16, int32, or int64 values, + * specifying the size of the window for each dimension of the input tensor. + * No default value. +*@li strides: A required list of int8, int16, int32, or int64 values, + * specifying the stride of the sliding window for each dimension of + * the input tensor. No default value. *@li padding: A required string. No default value. *@par Outputs: *y: A Tensor. Has the same type and format as input "x". - +*argmax: A Tensor. Has the same type and format as input "x". *@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. -*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. +*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, + * ksize[1] * ksize[2] <= 255. +*@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, + * strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolWithArgmax. */ REG_OP(MaxPoolWithArgmax) .INPUT(x, TensorType::RealNumberType()) @@ -321,25 +356,34 @@ REG_OP(MaxPoolWithArgmax) *@par Inputs: * Three inputs, including: -*@li x: An NC1HWC0 tensor of type float16. -*@li grad: An NC1HWC0 tensor of type float16. -*@li argmx: An NC1HWC0 tensor of type uint16 or int64. +*@li x: An NC1HWC0 tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. +*@li grad: An NC1HWC0 tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. +*@li argmx: An NC1HWC0 tensor of type int32 or int64. *@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. +*@li ksize: A required list of int8, int16, int32, or int64 values, + * specifying the size of the window for each dimension of the input tensor. + * No default value. +*@li strides: A required list of int8, int16, int32, or int64 values, + * specifying the stride of the sliding window for each dimension of + * the input tensor. No default value. *@li padding: A required string. No default value. *@par Outputs: *y: A Tensor. Has the same type and format as input "x". *@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. +*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, + * ksize[1] * ksize[2] <= 255. *@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 *@li "padding" is either "SAME" or "VALID". *@see max_pool_with_argmax +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator MaxPoolGradWithArgmax. */ REG_OP(MaxPoolGradWithArgmax) .INPUT(x, TensorType::RealNumberType()) @@ -355,15 +399,18 @@ REG_OP(MaxPoolGradWithArgmax) * @brief Computes second-order gradients of the maxpooling function. * @par Inputs: -* @li x: Original forward input tensor of type float16 -* @li grad: Gradient tensor of type float16 -* @li argmax: An tensor of type uint16 +* @li x: Original forward input tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. +* @li grad: Gradient tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64. +* @li argmax: An tensor of type int32 or int64. * @par Attributes: * @li ksize: A required list, specifying the size of the sliding window. * @li strides: A required list, specifying the stride of the sliding window. * @li padding: A required string, window sliding mode. Either SAME or VALID. * @par Outputs: -* @li y:Result tensor of type float16 +* @li y:Result tensor. Supported type: float, double, int32, + * uint8, int16, int8, int64, uint16, half, uint32, uint64 * @attention Constraints: * @li Only the cloud platform is supported. @@ -373,6 +420,9 @@ REG_OP(MaxPoolGradWithArgmax) * (shape_max_pool[2] * shape_max_pool[3] + 15) // 16 * 16, 1), * or (fmap_n, fmap_c1, kernel_h * kernel_w, * (shape_max_pool[2] * shape_max_pool[3] + 31) // 16, 16), else failed. + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator MaxPoolGradGradWithArgmax. */ REG_OP(MaxPoolGradGradWithArgmax) .INPUT(x, TensorType::RealNumberType()) @@ -402,6 +452,9 @@ REG_OP(MaxPoolGradGradWithArgmax) * @par Outputs: * @out_grad: A mutable tensor with the same shape and type as "orig_input". + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator AvgPoolGrad. */ REG_OP(AvgPoolGrad) .INPUT(orig_input_shape, TensorType({DT_INT32})) @@ -479,9 +532,9 @@ REG_OP(MaxPoolGradWithArgmaxCCE) * one input, including: *@li x: A tensor of type float16 or float32. *@par Attributes: -*@li scale:scale factor of x -*@li stride_h:broadcast the axis of h -*@li stride_w:broadcast the axis of w +*@li scale: A optional float, scale factor of x. Defaults to "1.0". +*@li stride_h: An optional int32, broadcast the axis of h. Defaults to "2". +*@li stride_w: An optional int32, broadcast the axis of w. Defaults to "2". *@par Outputs: *y: A tensor of type float16 or float32. */ @@ -513,6 +566,9 @@ REG_OP(Upsample) *@attention Constraints:\n *-The implementation for FractionalMaxPoolGrad on Ascend uses AICPU, with bad performance.\n + +*@par Third-party framework compatibility +*@li compatible with tensorflow FractionalMaxPoolGrad operator. */ REG_OP(FractionalMaxPoolGrad) .INPUT(orig_input, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) @@ -547,6 +603,9 @@ REG_OP(FractionalMaxPoolGrad) *@attention Constraints:\n *-The implementation for FractionalAvgPool on Ascend uses AICPU, with bad performance.\n + +*@par Third-party framework compatibility +*@li compatible with tensorflow FractionalAvgPool operator. */ REG_OP(FractionalAvgPool) .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) @@ -584,6 +643,9 @@ REG_OP(FractionalAvgPool) *@attention Constraints:\n *-The implementation for FractionalMaxPool on Ascend uses AICPU, with bad performance.\n + +*@par Third-party framework compatibility +*@li compatible with tensorflow FractionalMaxPool operator. */ REG_OP(FractionalMaxPool) .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64})) @@ -616,6 +678,8 @@ REG_OP(FractionalMaxPool) *@attention Constraints:\n *-The implementation for NthElement on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow NthElement operator. */ REG_OP(NthElement) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, @@ -646,6 +710,8 @@ REG_OP(NthElement) *@attention Constraints:\n *-The implementation for FractionalAvgPoolGrad on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow FractionalAvgPoolGrad operator. */ REG_OP(FractionalAvgPoolGrad) .INPUT(orig_input_tensor_shape, TensorType({DT_INT64})) @@ -674,6 +740,8 @@ REG_OP(FractionalAvgPoolGrad) *@attention Constraints:\n *-The implementation for DataFormatVecPermute on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow DataFormatVecPermute operator. */ REG_OP(DataFormatVecPermute) .INPUT(x, TensorType({ DT_INT32, DT_INT64 })) diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index ff93f9fa..17233386 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -20,13 +20,13 @@ #include "graph/operator_reg.h" namespace ge { /** -*@brief Updates "var" according to the AdaMax algorithm.\n +*@brief Updates "var" according to the AdaMax algorithm. * t-1 mean previous period. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n * v_t <- max(beta2 * v{t-1}, abs(grad))\n * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon) * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -43,7 +43,7 @@ namespace ge { *@li epsilon: A scalar. Has the same type as "var". *@li grad: A tensor for the gradient. Has the same type as "var". * -*@par Attributes:\n +*@par Attributes: * use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "ms", and "mom" tensors is protected * by a lock; otherwise the behavior is undefined, but may exhibit less @@ -52,6 +52,9 @@ namespace ge { *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdaMax. +* */ REG_OP(ApplyAdaMax) .INPUT(var, TensorType::NumberType()) @@ -68,13 +71,13 @@ REG_OP(ApplyAdaMax) .OP_END_FACTORY_REG(ApplyAdaMax) /** -*@brief Updates "var" according to the AdaMax algorithm.\n +*@brief Updates "var" according to the AdaMax algorithm. * t-1 mean previous period. * m_t <- beta1 * m{t-1} + (1 - beta1) * grad\n * v_t <- max(beta2 * v{t-1}, abs(grad))\n * var <- var - lr / (1 - beta1^t) * m_t / (v_t + epsilon) * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -91,15 +94,19 @@ REG_OP(ApplyAdaMax) *@li epsilon: A scalar. Has the same type as "var". *@li grad: A tensor for the gradient. Has the same type as "var". * -*@par Attributes:\n +*@par Attributes: * use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "ms", and "mom" tensors is protected * by a lock; otherwise the behavior is undefined, but may exhibit less * contention. * *@par Outputs: -* var: A mutable tensor. Has the same type as input "var". +*@li var: A mutable tensor. Has the same type as input "var". +*@li m: A mutable tensor. Has the same type as input "m". +*@li v: A mutable tensor. Has the same type as input "v". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdaMax. * */ REG_OP(ApplyAdaMaxD) @@ -136,6 +143,8 @@ REG_OP(ApplyAdaMaxD) *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyAdagrad. */ REG_OP(SparseApplyAdagrad) .INPUT(var, TensorType({DT_FLOAT})) @@ -167,6 +176,8 @@ REG_OP(SparseApplyAdagrad) *@li var: A Tensor. Has the same type and format as input "var". *@li accum: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyAdagrad. */ REG_OP(SparseApplyAdagradD) .INPUT(var, TensorType({DT_FLOAT})) @@ -199,6 +210,8 @@ REG_OP(SparseApplyAdagradD) *@par Outputs: *var: A Tensor. Has the same type and format as input "var". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SparseApplyAdagradV2. */ REG_OP(SparseApplyAdagradV2) .INPUT(var, TensorType({DT_FLOAT})) @@ -232,6 +245,8 @@ REG_OP(SparseApplyAdagradV2) *@li var: A Tensor. Has the same type and format as input "var". *@li accum: A Tensor. Has the same type and format as input "accum". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SparseApplyAdagradV2. */ REG_OP(SparseApplyAdagradV2D) .INPUT(var, TensorType({DT_FLOAT})) @@ -248,12 +263,12 @@ REG_OP(SparseApplyAdagradV2D) /** *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you -* want to use Nesterov momentum.\n +* want to use Nesterov momentum. * computing process: \n * accum = accum * momentum + grad\n * var -= lr * accum * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -269,13 +284,16 @@ REG_OP(SparseApplyAdagradV2D) * var - lr * momentum * accum, so in the end, the var you get is actually * var - lr * momentum * accum. * -*@li use_locking: An optional bool. Defaults to "False".\n +*@li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock; * otherwise the behavior is undefined, but may exhibit less contention. * *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyMomentum. +* */ REG_OP(ApplyMomentum) @@ -302,12 +320,12 @@ REG_OP(ApplyMomentumCCE) /** *@brief Updates "var" according to the momentum scheme. Set use_nesterov = True if you -* want to use Nesterov momentum.\n +* want to use Nesterov momentum. * computing process: \n * accum = accum * momentum + grad\n * var -= lr * accum * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -323,13 +341,15 @@ REG_OP(ApplyMomentumCCE) * var - lr * momentum * accum, so in the end, the var you get is actually * var - lr * momentum * accum. * -*@li use_locking: An optional bool. Defaults to "False".\n +*@li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "ms", and "mom" tensors is protected by a lock; * otherwise the behavior is undefined, but may exhibit less contention. * *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * accum: A mutable tensor. Has the same type as input "accum". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyMomentum. * */ @@ -377,6 +397,8 @@ REG_OP(ApplyMomentumD) *@attention Constraints: * The input tensors must have the same shape. * +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. * */ REG_OP(ApplyKerasMomentum) @@ -425,6 +447,8 @@ REG_OP(ApplyKerasMomentum) *@attention Constraints: * The input tensors must have the same shape. * +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. * */ REG_OP(ApplyKerasMomentumD) @@ -483,6 +507,8 @@ REG_OP(ApplyKerasMomentumD) *@attention Constraints: * The input tensors must have the same shape. * +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. * */ REG_OP(ApplyAdamWithAmsgradD) @@ -548,6 +574,8 @@ REG_OP(ApplyAdamWithAmsgradD) *@attention Constraints: * The input tensors must have the same shape. * +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ResourceApplyKerasMomentum. * */ REG_OP(ApplyAdamWithAmsgrad) @@ -568,13 +596,13 @@ REG_OP(ApplyAdamWithAmsgrad) /** -*@brief Updates "var" according to the AddSign update.\n +*@brief Updates "var" according to the AddSign update. * t-1 mean previous period. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n * var <- var - lr * update * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -596,6 +624,9 @@ REG_OP(ApplyAdamWithAmsgrad) *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyPowerSign. +* */ REG_OP(ApplyPowerSign) .INPUT(var, TensorType::NumberType()) @@ -610,13 +641,13 @@ REG_OP(ApplyPowerSign) .OP_END_FACTORY_REG(ApplyPowerSign) /** -*@brief Updates "var" according to the AddSign update.\n +*@brief Updates "var" according to the AddSign update. * t-1 mean previous period. * m_t <- beta1 * m_{t-1} + (1 - beta1) * grad\n * update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad\n * var <- var - lr * update * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -639,6 +670,8 @@ REG_OP(ApplyPowerSign) *@li var: A mutable tensor. Has the same type as input "var". *@li m: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyPowerSign. * */ REG_OP(ApplyPowerSignD) @@ -678,6 +711,9 @@ REG_OP(ApplyPowerSignD) *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyProximalGradientDescent. +* */ REG_OP(ApplyProximalGradientDescent) .INPUT(var, TensorType::NumberType()) @@ -713,6 +749,9 @@ REG_OP(ApplyProximalGradientDescent) *@par Outputs: *var: A mutable Tensor. Has the same type as "var". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ApplyAddSign. */ REG_OP(ApplyAddSign) .INPUT(var, TensorType::NumberType()) @@ -752,6 +791,8 @@ REG_OP(ApplyAddSign) *@li var: A mutable Tensor. Has the same type as "var". *@li m: A mutable Tensor. Has the same type as "m". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ApplyAddSign. */ REG_OP(ApplyAddSignD) .INPUT(var, TensorType::NumberType()) @@ -767,7 +808,7 @@ REG_OP(ApplyAddSignD) .OP_END_FACTORY_REG(ApplyAddSignD) /** -*@brief Updates "var" according to the centered RMSProp algorithm.\n +*@brief Updates "var" according to the centered RMSProp algorithm. * The centered RMSProp algorithm uses an estimate of the centered second moment * (i.e., the variance) for normalization, as opposed to regular RMSProp, which * uses the (uncentered) second moment. This often helps with training, but is @@ -779,7 +820,7 @@ REG_OP(ApplyAddSignD) * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n * var <- var - mom\n * -*@attention Constraints:\n +*@attention Constraints: *@li in dense implementation of this algorithm, mg, ms, and mom will * update even if the grad is zero, but in this sparse implementation, mg, ms, * and mom will not update in iterations during which the grad is zero. @@ -808,6 +849,9 @@ REG_OP(ApplyAddSignD) *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyCenteredRMSProp. +* */ REG_OP(ApplyCenteredRMSProp) .INPUT(var, TensorType::NumberType()) @@ -824,7 +868,7 @@ REG_OP(ApplyCenteredRMSProp) .OP_END_FACTORY_REG(ApplyCenteredRMSProp) /** -*@brief Updates "var" according to the centered RMSProp algorithm.\n +*@brief Updates "var" according to the centered RMSProp algorithm. * The centered RMSProp algorithm uses an estimate of the centered second moment * (i.e., the variance) for normalization, as opposed to regular RMSProp, which * uses the (uncentered) second moment. This often helps with training, but is @@ -836,7 +880,7 @@ REG_OP(ApplyCenteredRMSProp) * mom <- momentum * mom{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)\n * var <- var - mom\n * -*@attention Constraints:\n +*@attention Constraints: *@li in dense implementation of this algorithm, mg, ms, and mom will * update even if the grad is zero, but in this sparse implementation, mg, ms, * and mom will not update in iterations during which the grad is zero. @@ -868,6 +912,8 @@ REG_OP(ApplyCenteredRMSProp) *@li ms: A mutable Tensor. Has the same type as "ms". *@li mom: A mutable Tensor. Has the same type as "mom". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyCenteredRMSPropD. * */ REG_OP(ApplyCenteredRMSPropD) @@ -888,10 +934,10 @@ REG_OP(ApplyCenteredRMSPropD) .OP_END_FACTORY_REG(ApplyCenteredRMSPropD) /** -*@brief Updates "var" by subtracting 'alpha' * 'delta' from it.\n +*@brief Updates "var" by subtracting 'alpha' * 'delta' from it. * var -= delta * alpha * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -901,13 +947,16 @@ REG_OP(ApplyCenteredRMSPropD) * *@par Attributes: * use_locking: An optional bool. Defaults to "False". -* If "True", updating of the "var", "ms", and "mom" tensors is protected +* If "True", updating of the "var" tensors is protected * by a lock; otherwise the behavior is undefined, but may exhibit less * contention. * *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyGradientDescent. +* */ REG_OP(ApplyGradientDescent) .INPUT(var, TensorType::NumberType()) @@ -918,11 +967,11 @@ REG_OP(ApplyGradientDescent) .OP_END_FACTORY_REG(ApplyGradientDescent) /** -*@brief Updates "var" according to the adagrad scheme.\n +*@brief Updates "var" according to the adagrad scheme. * accum += grad * grad\n * var -= lr * grad * (1 / sqrt(accum)) * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -933,7 +982,8 @@ REG_OP(ApplyGradientDescent) *@li grad: A tensor for the gradient. Has the same type as "var". * *@par Attributes: -* use_locking: An optional bool. Defaults to "False". +*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False". +*@li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "ms", and "mom" tensors is protected * by a lock; otherwise the behavior is undefined, but may exhibit less * contention. @@ -941,6 +991,9 @@ REG_OP(ApplyGradientDescent) *@par Outputs: * var: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdagrad. +* */ REG_OP(ApplyAdagrad) .INPUT(var, TensorType::NumberType()) @@ -953,11 +1006,11 @@ REG_OP(ApplyAdagrad) .OP_END_FACTORY_REG(ApplyAdagrad) /** -*@brief Updates "var" according to the adagrad scheme.\n +*@brief Updates "var" according to the adagrad scheme. * accum += grad * grad\n * var -= lr * grad * (1 / sqrt(accum)) * -*@attention Constraints:\n +*@attention Constraints: * the input tensors must have the same shape. * *@par Inputs: @@ -968,7 +1021,8 @@ REG_OP(ApplyAdagrad) *@li grad: A tensor for the gradient. Has the same type as "var". * *@par Attributes: -* use_locking: An optional bool. Defaults to "False". +*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False". +*@li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", "ms", and "mom" tensors is protected * by a lock; otherwise the behavior is undefined, but may exhibit less * contention. @@ -977,6 +1031,8 @@ REG_OP(ApplyAdagrad) *@li var: A mutable tensor. Has the same type as input "var". *@li accum: A mutable tensor. Has the same type as input "var". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdagrad. * */ REG_OP(ApplyAdagradD) @@ -1019,6 +1075,9 @@ REG_OP(ApplyAdagradD) * @attention Constraints: * The input tensors must have the same shape. * +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ApplyAdagrad. +* */ REG_OP(ApplyAdagradV2) .INPUT(var, TensorType::NumberType()) @@ -1061,6 +1120,9 @@ REG_OP(ApplyAdagradV2) * @attention Constraints: * The input tensors must have the same shape. * +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ApplyAdagrad. +* */ REG_OP(ApplyAdagradV2D) .INPUT(var, TensorType::NumberType()) @@ -1103,6 +1165,9 @@ REG_OP(ApplyAdagradV2D) *@par Outputs: *var: A mutable Tensor. Has the same type as "var". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdagradDA. */ REG_OP(ApplyAdagradDA) .INPUT(var, TensorType::NumberType()) @@ -1149,6 +1214,8 @@ REG_OP(ApplyAdagradDA) *gradient_accumulator: A mutable Tensor. Has the same type as "var". *gradient_squared_accumulator: A mutable Tensor. Has the same type as "var". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdagradDA. */ REG_OP(ApplyAdagradDAD) .INPUT(var, TensorType::NumberType()) @@ -1176,12 +1243,15 @@ REG_OP(ApplyAdagradDAD) * *@par Attributes: *@li src_format: An optional string. Defaults to NHWC. -* source data format. +* source data format. Must of length 4. *@li dst_format: An optional string. Defaults to NCHW. -* destination data format. +* destination data format. Must of length 4. * *@par Outputs: -* y: A tensor. Has the same type as "x". +* y: A tensor. Has the same type as "x". Must be in the range [0, 4). +* +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator DataFormatDimMap. * */ REG_OP(DataFormatDimMap) @@ -1223,6 +1293,9 @@ REG_OP(DataFormatDimMap) * parameters: A mutable tensor same as input "parameters". * @see ApplyMomentum() + +* @par Third-party framework compatibility +* @li Compatible with the PyTorch operator SGD. */ REG_OP(SGD) .INPUT(parameters, TensorType(DT_FLOAT, DT_FLOAT16)) @@ -1238,7 +1311,7 @@ REG_OP(SGD) .OP_END_FACTORY_REG(SGD) /** -* @brief Updates "var" according to the RMSProp algorithm.\n +* @brief Updates "var" according to the RMSProp algorithm. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n @@ -1246,7 +1319,7 @@ REG_OP(SGD) * var <- var - mom\n * * @par Inputs: -* @li var: A mutable tensor. Must be one of the data types defined in\n +* @li var: A mutable tensor. Must be one of the data types defined in * TensorType::NumberType(). Should be from a Variable(). * @li ms: A mutable tensor. Must have the same type as "var". Should be from a * Variable(). @@ -1259,18 +1332,21 @@ REG_OP(SGD) * @li grad: A tensor, specifying the gradient. Must have the same type as "var". * * @par Attributes: -* use_locking: An optional "bool". Defaults to "False". If "True", updating of\n -* the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the\n +* use_locking: An optional "bool". Defaults to "False". If "True", updating of +* the "var", "ms", and "mom" tensors will be protected by a lock; otherwise the * behavior is undefined, but may exhibit less contention. * * @par Outputs: * var: A mutable tensor. Has the same type as input "var". * * @attention Constraints: -* @li Note that in dense implementation of this algorithm, "ms" and "mom" will \n -* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" \n +* @li Note that in dense implementation of this algorithm, "ms" and "mom" will +* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" * will not update in iterations during which "grad" is 0. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. +* +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator ApplyRMSProp. */ REG_OP(ApplyRMSProp) .INPUT(var, TensorType::NumberType()) @@ -1287,7 +1363,7 @@ REG_OP(ApplyRMSProp) /** * @brief Updates "var" according to the RMSProp algorithm, a const input will be -* considered as an attribute.\n +* considered as an attribute. * mean_square = decay * mean_square + (1-decay) * gradient ** 2\n * Delta = learning_rate * gradient / sqrt(mean_square + epsilon)\n * ms <- rho * ms_{t-1} + (1-rho) * grad * grad\n @@ -1295,7 +1371,7 @@ REG_OP(ApplyRMSProp) * var <- var - mom * * @par Inputs: -* @li var: A mutable tensor. Must be one of the data types defined in\n +* @li var: A mutable tensor. Must be one of the data types defined in * TensorType::NumberType(). Should be from a Variable(). * @li ms: A mutable tensor. Must have the same type as "var". Should be from a * Variable(). @@ -1305,8 +1381,8 @@ REG_OP(ApplyRMSProp) * @li grad: A tensor, specifying the gradient. Must have the same type as "var". * * @par Attributes: -* @li use_locking: An optional "bool". Defaults to "False". If "True", updating\n -* of the "var", "ms", and "mom" tensors will be protected by a lock; \n +* @li use_locking: An optional "bool". Defaults to "False". If "True", updating +* of the "var", "ms", and "mom" tensors will be protected by a lock; * otherwise the behavior is undefined, but may exhibit less contention. * @li rho: A required scalar. Must have the same type as "var". * @li momentum: A required scalar. Must have the same type as "var". @@ -1316,10 +1392,13 @@ REG_OP(ApplyRMSProp) * var: A mutable tensor. Must have the same type as input "var". * * @attention Constraints: -* @li Note that in dense implementation of this algorithm, "ms" and "mom" will\n -* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom"\n +* @li Note that in dense implementation of this algorithm, "ms" and "mom" will +* update even if "grad" is 0, but in this sparse implementation, "ms" and "mom" * will not update in iterations during which "grad" is 0. * @li The input tensors "var", "ms", "mom" and "grad" must have the same shape. +* +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator ApplyRMSProp. */ REG_OP(ApplyRMSPropD) .INPUT(var, TensorType::NumberType()) @@ -1353,9 +1432,10 @@ REG_OP(ApplyRMSPropD) *use_locking: An optional bool. Defaults to "False". If "True", updating of the "var" and "accum" *tensors will be protected by a lock; otherwise the behavior is undefined, but may exhibit less *contention. *@par Outputs: -* @li var: A mutable tensor. Must have the same type as input "var". -* @li ms: A mutable tensor. Must have the same type as input "ms". -* @li mom: A mutable tensor. Must have the same type as input "mom". +*var: A mutable tensor. Must have the same type as input "var". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyProximalAdagrad. */ REG_OP(ApplyProximalAdagrad) .INPUT(var, TensorType::NumberType()) @@ -1388,6 +1468,8 @@ REG_OP(ApplyProximalAdagrad) * @li var: A mutable Tensor. Has the same type as "var". * @li accum: A mutable Tensor. Has the same type as "var". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyProximalAdagradD. */ REG_OP(ApplyProximalAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1402,34 +1484,37 @@ REG_OP(ApplyProximalAdagradD) .OP_END_FACTORY_REG(ApplyProximalAdagradD) /** -*@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm.\ n +*@brief Updates entries in 'var' and 'accum' according to the Proximal Adagrad algorithm. * Compared with op ApplyProximalAdagrad, an additional index tensor is input, * Only the indices into the first dimensions of "var" and "accum" are updated. *@par Inputs: * Seven inputs, including:\n -* @li var: A mutable Tensor.\n +* @li var: A mutable Tensor. * TensorType::NumberType(). Should be a Variable Tensor. -* @li accum: A mutable Tensor of the same type as "var".\n +* @li accum: A mutable Tensor of the same type as "var". * Should be a Variable Tensor. -* @li lr: A Tensor of the same type as "var".\n +* @li lr: A Tensor of the same type as "var". * Scaling factor. Must be a scalar. -* @li l1: A Tensor of the same type as "var".\n +* @li l1: A Tensor of the same type as "var". * L1 regulariation. Must be a scalar. -* @li l2: A Tensor of the same type as "var".\n +* @li l2: A Tensor of the same type as "var". * L2 regulariation. Must be a scalar. -* @li grad: A Tensor. Has the same type as "var". \n +* @li grad: A Tensor. Has the same type as "var". * The gradient. -* @li indices: A vector of indices into the first dimension of "var" and "accum".\n +* @li indices: A vector of indices into the first dimension of "var" and "accum". * TensorType::IndexNumberType(). *@par Attributes: *use_locking: An optional bool. Defaults to "False".\n -* If "True", updating of the var and accum tensors will be protected by a lock; \n +* If "True", updating of the var and accum tensors will be protected by a lock; \n * If "False", the behavior is undefined, but may exhibit less contention. *@par Outputs: *var: A mutable Tensor. Has the same type as "var". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SparseApplyProximalAdagrad. */ REG_OP(SparseApplyProximalAdagrad) .INPUT(var, TensorType::NumberType()) @@ -1474,6 +1559,8 @@ REG_OP(SparseApplyProximalAdagrad) *@li var: A mutable Tensor. Has the same type as "var". *@li accum: A mutable Tensor. Has the same type as "var". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SparseApplyProximalAdagrad. */ REG_OP(SparseApplyProximalAdagradD) .INPUT(var, TensorType::NumberType()) @@ -1513,6 +1600,9 @@ REG_OP(SparseApplyProximalAdagradD) *@par Outputs: *var: A mutable Tensor. Has the same type as "var". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyFtrl. */ REG_OP(ApplyFtrl) .INPUT(var, TensorType::NumberType()) @@ -1555,6 +1645,8 @@ REG_OP(ApplyFtrl) *@li accum: A mutable Tensor. Has the same type as "accum". *@li linear: A mutable Tensor. Has the same type as "linear". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyFtrl. */ REG_OP(ApplyFtrlD) .INPUT(var, TensorType::NumberType()) @@ -1598,6 +1690,8 @@ REG_OP(ApplyFtrlD) *@par Outputs: *var: A mutable Tensor. Has the same type as "var". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyFtrlV2. */ REG_OP(ApplyFtrlV2) .INPUT(var, TensorType::NumberType()) @@ -1642,6 +1736,8 @@ REG_OP(ApplyFtrlV2) *accum: A mutable Tensor. Has the same type as "accum". *linear: A mutable Tensor. Has the same type as "linear". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyFtrlV2. */ REG_OP(ApplyFtrlV2D) .INPUT(var, TensorType::NumberType()) @@ -1660,13 +1756,13 @@ REG_OP(ApplyFtrlV2D) .OP_END_FACTORY_REG(ApplyFtrlV2D) /** -*@brief Updates "var" according to the Adam algorithm.\n +*@brief Updates "var" according to the Adam algorithm. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n * v_t <- max(beta2 * v{t-1}, abs(g))\n * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon) * -*@attention Constraints:\n +*@attention Constraints: * *The input tensors must have the same shape.* * *@par Inputs: @@ -1684,7 +1780,7 @@ REG_OP(ApplyFtrlV2D) *@li epsilon: A scalar of the same type as "var". *@li grad: A Tensor of the same type as "var", for the gradient. * -*@par Attributes:\n +*@par Attributes: *@li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", m", and "v" tensors will be protected * by a lock; otherwise the behavior is undefined, but may exhibit less @@ -1694,6 +1790,9 @@ REG_OP(ApplyFtrlV2D) * *@par Outputs: * var: A mutable Tensor. Has the same type as intput "var". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdam. */ REG_OP(ApplyAdam) .INPUT(var, TensorType::NumberType()) @@ -1712,13 +1811,13 @@ REG_OP(ApplyAdam) .OP_END_FACTORY_REG(ApplyAdam) /** -*@brief Updates "var" according to the Adam algorithm.\n +*@brief Updates "var" according to the Adam algorithm. * lr_t <- text{learning\_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)\n * m_t <- beta_1 * m_{t-1} + (1 - beta_1) * g\n * v_t <- max(beta2 * v{t-1}, abs(g))\n * variable <- variable - lr_t * m_t / (sqrt{v_t} + epsilon) * -*@attention Constraints:\n +*@attention Constraints: * *The input tensors must have the same shape.* * *@par Inputs: @@ -1736,7 +1835,7 @@ REG_OP(ApplyAdam) *@li epsilon: A scalar of the same type as "var". *@li grad: A Tensor of the same type as "var", for the gradient. * -*@par Attributes:\n +*@par Attributes: *@li use_locking: An optional bool. Defaults to "False". * If "True", updating of the "var", m", and "v" tensors will be protected * by a lock; otherwise the behavior is undefined, but may exhibit less @@ -1748,6 +1847,9 @@ REG_OP(ApplyAdam) *@li var: A mutable tensor. Has the same type as input "var". *@li m: A mutable tensor. Has the same type as input "m". *@li v: A mutable tensor. Has the same type as input "v". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ApplyAdam. */ REG_OP(ApplyAdamD) .INPUT(var, TensorType::NumberType()) @@ -1791,6 +1893,9 @@ REG_OP(ApplyAdamD) *@par Outputs: *var: A mutable Tensor. Has the same type as "var". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ApplyAdadelta. */ REG_OP(ApplyAdadelta) .INPUT(var, TensorType::NumberType()) @@ -1831,6 +1936,8 @@ REG_OP(ApplyAdadelta) *@li accum: A mutable Tensor. Has the same type as "var". *@li accum_update: A mutable Tensor. Has the same type as "var". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ApplyAdadelta. */ REG_OP(ApplyAdadeltaD) .INPUT(var, TensorType::NumberType()) @@ -2035,6 +2142,8 @@ REG_OP(LarsV2Update) * @par Outputs: * var: A Tensor. Has the same type and format as input "var". +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyFtrl. */ REG_OP(SparseApplyFtrl) .INPUT(var, TensorType({DT_FLOAT})) @@ -2079,6 +2188,8 @@ REG_OP(SparseApplyFtrl) * @li accum: A Tensor. Has the same type and format as input "accum". * @li linear: A Tensor. Has the same type and format as input "linear". +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyFtrl. */ REG_OP(SparseApplyFtrlD) .INPUT(var, TensorType({DT_FLOAT})) @@ -2125,6 +2236,8 @@ REG_OP(SparseApplyFtrlD) * @par Outputs: * var: A Tensor. Has the same type and format as input "var". +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyFtrlV2. */ REG_OP(SparseApplyFtrlV2) .INPUT(var, TensorType({DT_FLOAT})) @@ -2172,6 +2285,8 @@ REG_OP(SparseApplyFtrlV2) * @li accum: A Tensor. Has the same type and format as input "accum". * @li linear: A Tensor. Has the same type and format as input "linear". +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyFtrlV2D. */ REG_OP(SparseApplyFtrlV2D) .INPUT(var, TensorType({DT_FLOAT})) @@ -2225,6 +2340,8 @@ REG_OP(SparseApplyFtrlV2D) * in iterations during which "grad" is 0. * @li The input tensors "var", "ms", and "mom" must have the same shape. * +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyRMSProp. */ REG_OP(SparseApplyRMSProp) .INPUT(var, TensorType::NumberType()) @@ -2326,6 +2443,8 @@ REG_OP(SparseApplyRMSPropD) * in iterations during which "grad" is 0. * @li The input tensors "var", "accum", and "accum_update" must have the same shape. * +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseApplyAdadelta. */ REG_OP(SparseApplyAdadelta) .INPUT(var, TensorType::NumberType()) diff --git a/third_party/fwkacllib/inc/ops/no_op.h b/third_party/fwkacllib/inc/ops/no_op.h index 55ed875b..61e187c4 100644 --- a/third_party/fwkacllib/inc/ops/no_op.h +++ b/third_party/fwkacllib/inc/ops/no_op.h @@ -24,6 +24,9 @@ namespace ge { /** *@brief Does nothing. Only useful as a placeholder for control edges. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator NoOp. */ REG_OP(NoOp) diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index 0e1c4b22..d38faf49 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -29,6 +29,8 @@ namespace ge { *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Gelu */ REG_OP(Gelu) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -39,13 +41,15 @@ REG_OP(Gelu) *@brief Computes the gradient for the gelu of "x". *@par Inputs: -*Two inputs, including: +*Three inputs, including: * @li dy: A Tensor. Must be one of the following types: float16, float32 * @li x: A Tensor of the same type as "dy". * @li y: A Tensor of the same type as "dy". *@par Outputs: *z: A Tensor. Has the same type as "dy". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator GeluGrad */ REG_OP(GeluGrad) .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -65,6 +69,8 @@ REG_OP(GeluGrad) *@par Outputs: *z: A Tensor. Has the same type as "y". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator TanhGrad. */ REG_OP(TanhGrad) .INPUT(y, TensorType::UnaryDataType()) @@ -77,11 +83,13 @@ REG_OP(TanhGrad) *@par Inputs: *One input: -*x: A Tensor. Must be one of the following types: float16, float32, double, complex64, complex128, int32, int64 +*x: A Tensor. Must be one of the following types: float16, float32, complex64, complex128, int32, int64 *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Tanh. */ REG_OP(Tanh) .INPUT(x, TensorType::UnaryDataType()) @@ -98,6 +106,10 @@ REG_OP(Tanh) * @par Outputs: * y: A tensor. Has the same type as "x". * +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator Relu. +* @li Compatible with the Caffe operator ReLULayer. +* */ REG_OP(Relu) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, @@ -117,6 +129,9 @@ REG_OP(Relu) * @par Outputs: * y: A Tensor of type RealNumberType. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Relu6. */ REG_OP(Relu6) .INPUT(x, TensorType::RealNumberType()) @@ -135,6 +150,9 @@ REG_OP(Relu6) * @par Outputs: * y: A Tensor of type RealNumberType. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Relu6. */ REG_OP(Relu6D) .INPUT(x, TensorType::RealNumberType()) @@ -152,6 +170,9 @@ REG_OP(Relu6D) * @par Outputs: * backprops: A Tensor of type RealNumberType. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Relu6Grad. */ REG_OP(Relu6Grad) .INPUT(gradients, TensorType::RealNumberType()) @@ -169,6 +190,9 @@ REG_OP(Relu6Grad) * A Tensor. Has the same type as "x". * @see Relu() + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Sigmoid. */ REG_OP(Sigmoid) .INPUT(x, TensorType::UnaryDataType()) @@ -201,6 +225,8 @@ REG_OP(SigmoidGrad) *@par Outputs: *y: A tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the Caffe operator BNLL. */ REG_OP(BNLL) .INPUT(x, TensorType::FloatingDataType()) @@ -217,6 +243,8 @@ REG_OP(BNLL) *@par Outputs: *y: The activations tensor. Has the same type and format as input "x" +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Softplus. */ REG_OP(Softplus) .INPUT(x, TensorType::FloatingDataType()) @@ -235,6 +263,8 @@ REG_OP(Softplus) *@par Outputs: *backprops: A Tensor. Has the same type and format as input "gradients". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SoftplusGrad. */ REG_OP(SoftplusGrad) .INPUT(gradients, TensorType::FloatingDataType()) @@ -252,6 +282,8 @@ REG_OP(SoftplusGrad) *@par Outputs: *y: The activations tensor. Has the same type and format as "x" +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Softsign. */ REG_OP(Softsign) .INPUT(x, TensorType::FloatingDataType()) @@ -262,14 +294,17 @@ REG_OP(Softsign) *@brief Computes scaled exponential linear: scale * alpha * (exp(x) - 1). *@par Inputs: -* One input: \n -*x: A Tensor. Must be one of the following types: float16, float32, int32, int8. +* One input: +*x: A Tensor. Must be one of the following types: float16, float, double + * int32, int8. format:ND, NC1HWC0. *@par Outputs: -*y: A Tensor. Has the same type and format as input "x". +*y: A Tensor. Has the same type and format as input "x". format:ND, NC1HWC0. *@see Region() +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Selu. */ REG_OP(Selu) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE, @@ -283,8 +318,10 @@ REG_OP(Selu) *@par Inputs: * Two inputs, including: -*@li gradients: A Tensor. Must be one of the following types: float32, double, int32, int8, int16, int8, int64, uint16, float16, uint32, uint64 -*@li features: A Tensor. Must be one of the following types: float32, double, int32, int8, int16, int8, int64, uint16, float16, uint32, uint64 +*@li gradients: A Tensor. Must be one of the following types: float32, double, + * int32, int8, int16, int64, uint16, float16, uint32, uint64 +*@li features: A Tensor. Must be one of the following types: float32, double, + * int32, int8, int16, int64, uint16, float16, uint32, uint64 *@par Outputs: *backprops: A Tensor. Must have the same type as"gradients". @@ -294,6 +331,8 @@ REG_OP(Selu) *@see Relu +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ReluGrad. */ REG_OP(ReluGrad) .INPUT(gradients, TensorType::RealNumberType()) @@ -316,6 +355,9 @@ REG_OP(ReluGrad) * The corresponding Relu operator needs to be called before using this operator on the network. *@see Relu + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator ReluGradV2. */ REG_OP(ReluGradV2) .INPUT(gradients, TensorType::RealNumberType()) @@ -337,6 +379,10 @@ REG_OP(ReluGradV2) *@par Outputs: *@li y: A tensor. Has the same type as "x". *@li mask: A tensor of type uint8. +* +*@par Third-party framework compatibility +* Incompatible with TensorFlow or Caffe. +* */ REG_OP(ReluV2) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE, DT_INT8, DT_INT32, DT_INT16, DT_INT64, DT_UINT8, DT_UINT16, DT_QINT8})) @@ -355,6 +401,8 @@ REG_OP(ReluV2) *@par Outputs: *y: An activated Tensor. Has the same dimensions with "x". +*@par Third-party framework compatibility +* Compatible with PyTorch and Caffe operator PReLU. */ REG_OP(PRelu) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) @@ -375,6 +423,8 @@ REG_OP(PRelu) *@li dx: Reverse gradient of "features". Has the same dimensions and type as "features". *@li da: Reverse gradient of "weight". Has the same dimensions and type as "features". +*@par Third-party framework compatibility +* Compatible with PyTorch operator PReluGrad. */ REG_OP(PReluGrad) .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -385,22 +435,26 @@ REG_OP(PReluGrad) .OP_END_FACTORY_REG(PReluGrad) /** -*@brief Activation function fused from sigmoid and ReLU, with soft saturation on the left and no saturation on the right. +*@brief Activation function fused from sigmoid and ReLU, with soft saturation +* on the left and no saturation on the right. *@par Inputs: -*x: A float16 or float32, for the input data type. +*x: A float16, float32 or double, for the input data type. *@par Attributes: *alpha: A float. Defines at which negative value the ELU saturates. Defaults to "1.0". *@par Outputs: -*y: A float16 or float32, for the normalized result. +*y: A float16, float32 or double, for the normalized result. *@attention Constraints: *@li The input is of type float16 or float32. *@par Multiple batches supported or not *Supported +*@par Third-party framework compatibility +*@li Compatible with Tensorflow's Elu operator +*@li Compatible with Caffe's ELULayer operator * *@since V100R001C33 */ @@ -422,6 +476,9 @@ REG_OP(Elu) *@par Outputs: * y: A tensor. Has the same type as "grads". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator EluGrad. +* */ REG_OP(EluGrad) .INPUT(grads, TensorType::FloatingDataType()) @@ -441,6 +498,8 @@ REG_OP(EluGrad) * *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the Caffe operator ReLU. */ REG_OP(LeakyRelu) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_DOUBLE})) @@ -461,6 +520,9 @@ REG_OP(LeakyRelu) *@par Outputs: *backprops: A Tensor. Has the same type as "gradients". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator LeakyReluGrad. */ REG_OP(LeakyReluGrad) .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 346c72a1..7e9c65f4 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -21,17 +21,24 @@ namespace ge { /** -*@brief Creates a tensor filled with a scalar value.\n +*@brief Creates a tensor filled with a scalar value. * This operation creates a tensor of shape "dims" and fills it with "value". * *@par Inputs: *@li dims: A 1D tensor of types int32 or int64. Represents the shape of the output tensor. *@li value: A 0D scalar. Specifies the value to fill the returned tensor. +* Must be one of the following types: +* float16, float32, double, int32, uint8, int16, int8, complex64, int64, +* qint8, quint8, qint32, uint16, complex128, uint32, uint64. * *@par Outputs: * y: A tensor. Has the same type as "value". * +*@par Third-party framework compatibility +*@li Compatible with the TensorFlow operator Fill. +*@li Compatible with the Caffe operator Filler. +* */ REG_OP(Fill) .INPUT(dims, TensorType::IndexNumberType()) @@ -40,11 +47,13 @@ REG_OP(Fill) .OP_END_FACTORY_REG(Fill) /** -*@brief Creates a tensor filled with a scalar value.\n +*@brief Creates a tensor filled with a scalar value. * This operation creates a tensor of shape "dims" and fills it with "value". * *@par Inputs: -* value: A 0D scalar for the value to fill the returned tensor. +* value: A 0D scalar for the value to fill the returned tensor. Must be one of +* the following types: +* float16, float32, uint8, int8, int16, int32, int64, quint8, qint8, qint32 * *@par Attributes: * dims: A tensor. Must be one of the following types:"int32" @@ -65,20 +74,24 @@ REG_OP(FillD) .OP_END_FACTORY_REG(FillD) /** -*@brief Broadcasts an array for a compatible shape.\n -* Broadcasting is the process of making arrays to have compatible shapes -* for arithmetic operations. Two shapes are compatible if for each +*@brief Broadcasts an array for a compatible shape. +* Broadcasting is the process of making arrays to have compatible shapes +* for arithmetic operations. Two shapes are compatible if for each * dimension pair they are either equal or one of them is one. When trying * to broadcast a Tensor to a shape, it starts with the trailing dimensions, * and works its way forward. * *@par Inputs: *@li x: A tensor. -*@li shape: A tensor of type int32 or int64. +*@li shape: A tensor of type int32. * A 1D tensor of type int32, for the shape of the desired output. * *@par Outputs: * y: A tensor. Has the same type as "x". +* +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BroadcastTo. +* */ REG_OP(BroadcastTo) .INPUT(x, TensorType::BasicType()) @@ -87,9 +100,9 @@ REG_OP(BroadcastTo) .OP_END_FACTORY_REG(BroadcastTo) /** -*@brief Broadcasts an array for a compatible shape.\n -* Broadcasting is the process of making arrays to have compatible shapes -* for arithmetic operations. Two shapes are compatible if for each +*@brief Broadcasts an array for a compatible shape. +* Broadcasting is the process of making arrays to have compatible shapes +* for arithmetic operations. Two shapes are compatible if for each * dimension pair they are either equal or one of them is one. When trying * to broadcast a Tensor to a shape, it starts with the trailing dimensions, * and works its way forward. @@ -104,6 +117,9 @@ REG_OP(BroadcastTo) *@par Outputs: * y: A tensor. Has the same type as "x". * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator BroadcastTo. +* */ REG_OP(BroadcastToD) .INPUT(x, TensorType::BasicType()) @@ -123,6 +139,9 @@ REG_OP(BroadcastToD) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with TensorFlow operator Pad. */ REG_OP(Pad) .INPUT(x, TensorType::BasicType()) @@ -145,6 +164,9 @@ REG_OP(Pad) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with TensorFlow operator Pad. */ REG_OP(PadD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_FLOAT})) @@ -160,10 +182,15 @@ REG_OP(PadD) * @li x: A mutable Tensor. Must be one of the following types: * float16, float32, int32. -* @li assist: A mutable Tensor of the same type as "x". +* @li assist: A mutable Tensor with rank k is at most 1, +* Has the same type as "x". *@par Outputs: *y: A mutable Tensor. Has the same type as "x". + +*@see Diag() +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Diag. */ REG_OP(DiagD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -176,11 +203,16 @@ REG_OP(DiagD) *@par Inputs: *One input, include: -* x: A mutable Tensor. Must be one of the following types: +* x: A mutable Tensor with rank k, where k is at most 1. Must be one of the +* following types: * float16, float32, double, int32, int64, complex64, complex128. *@par Outputs: *y: A mutable Tensor. Has the same type as "x". + +*@see DiagD() +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Diag. */ REG_OP(Diag) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, @@ -188,6 +220,26 @@ REG_OP(Diag) .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_COMPLEX64, DT_COMPLEX128})) .OP_END_FACTORY_REG(Diag) + +/** +*@brief Ascend Padding, pad the last dimension of input + +*@par Inputs: +*One input, include: +*x: Tensor which last dimension must be 1. For example: [624000, 1]. + +*@par Outputs: +*y: Padding the last dimension of x to padDimSize, [624000, padDimSize]. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Diag. +*/ +REG_OP(AscendPadding) + .INPUT(x, TensorType::BasicType()) + .OUTPUT(y, TensorType::BasicType()) + .ATTR(pad_dim_size, Int, 8) + .OP_END_FACTORY_REG(AscendPadding) + } // namespace ge #endif //GE_OP_PAD_OPS_H diff --git a/third_party/fwkacllib/inc/ops/parsing_ops.h b/third_party/fwkacllib/inc/ops/parsing_ops.h index a8d1a757..a8a3e7a1 100644 --- a/third_party/fwkacllib/inc/ops/parsing_ops.h +++ b/third_party/fwkacllib/inc/ops/parsing_ops.h @@ -38,6 +38,8 @@ namespace ge { *@attention Constraints:\n *-The implementation for StringToNumber on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow StringToNumber operator. */ REG_OP(StringToNumber) .INPUT(x, TensorType({DT_STRING})) diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index d9fe2540..4a4bd606 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -44,6 +44,9 @@ namespace ge { * @attention Constraints: * @li "input_min_range" and "input_max_range" have the same shapes. * @li "input_data" and "output_data" have the same shapes. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Dequantize. */ REG_OP(Dequantize) .INPUT(x, TensorType(DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16)) @@ -68,6 +71,8 @@ REG_OP(Dequantize) *@par Outputs: *y: The quantized output tensor of type int8 and with format NC1HWC0. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(AscendQuant) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT32})) @@ -92,6 +97,8 @@ REG_OP(AscendQuant) *@par Outputs: *y: The dequantized output tensor of type float16 or float32 and with format NC1HWC0. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(AscendDequant) .INPUT(x, TensorType({DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/ragged_array_ops.h b/third_party/fwkacllib/inc/ops/ragged_array_ops.h index 4f3cf97e..2b8bba5f 100644 --- a/third_party/fwkacllib/inc/ops/ragged_array_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_array_ops.h @@ -41,6 +41,8 @@ namespace ge { *y:A Returns The `nested_row_splits` tensors that define the row-partitioning for the \n *returned RaggedTensor.The `flat_values` for the returned RaggedTensor. +*@par Third-party framework compatibility +* Compatible with tensorflow RaggedGather operator. */ REG_OP(RaggedGather) diff --git a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h index 7a42e4d9..82fd84b7 100644 --- a/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_conversion_ops.h @@ -40,6 +40,8 @@ int64, double, float, float16. *@li sparse_values: A Tensor. Has the same type as rt_dense_values. *@li sparse_dense_shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with TensorFlow operator RaggedTensorToSparse. */ REG_OP(RaggedTensorToSparse) .DYNAMIC_INPUT(rt_nested_splits, TensorType({DT_INT32, DT_INT64})) diff --git a/third_party/fwkacllib/inc/ops/ragged_math_ops.h b/third_party/fwkacllib/inc/ops/ragged_math_ops.h index 80669f0f..e56c35a5 100644 --- a/third_party/fwkacllib/inc/ops/ragged_math_ops.h +++ b/third_party/fwkacllib/inc/ops/ragged_math_ops.h @@ -38,6 +38,8 @@ namespace ge { *The vector inputs must all have the same size. Scalar inputs are broadcast \n *to match the size of the vector inputs. +*@par Third-party framework compatibility +* Compatible with tensorflow RaggedRange operator. */ REG_OP(RaggedRange) diff --git a/third_party/fwkacllib/inc/ops/random_ops.h b/third_party/fwkacllib/inc/ops/random_ops.h index 41c1fff9..a35e8b3a 100644 --- a/third_party/fwkacllib/inc/ops/random_ops.h +++ b/third_party/fwkacllib/inc/ops/random_ops.h @@ -43,6 +43,8 @@ namespace ge { *@attention Constraints:\n *-The implementation for Multinomial on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow Multinomial operator. */ REG_OP(Multinomial) .INPUT(logits, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE})) @@ -75,6 +77,8 @@ REG_OP(Multinomial) *@attention Constraints:\n *-The implementation for ParameterizedTruncatedNormal on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow ParameterizedTruncatedNormal operator. */ REG_OP(ParameterizedTruncatedNormal) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) @@ -101,6 +105,8 @@ REG_OP(ParameterizedTruncatedNormal) *@attention Constraints:\n *-The implementation for RandomGammaGrad on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomGammaGrad operator. */ REG_OP(RandomGammaGrad) .INPUT(alpha, TensorType({DT_FLOAT, DT_DOUBLE})) @@ -126,6 +132,8 @@ REG_OP(RandomGammaGrad) *@attention Constraints:\n *-The implementation for RandomGamma on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomGamma operator. */ REG_OP(RandomGamma) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) @@ -154,6 +162,8 @@ REG_OP(RandomGamma) *@attention Constraints:\n *-The implementation for RandomPoisson on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomPoisson operator. */ REG_OP(RandomPoisson) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) @@ -183,6 +193,8 @@ REG_OP(RandomPoisson) *@attention Constraints:\n *-The implementation for RandomShuffle on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomShuffle operator. */ REG_OP(RandomShuffle) .INPUT(x, TensorType({DT_INT64, DT_INT32, DT_UINT16, DT_INT16, @@ -213,6 +225,8 @@ REG_OP(RandomShuffle) *@attention Constraints:\n *-The implementation for RandomStandardNormal on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomStandardNormal operator. */ REG_OP(RandomStandardNormal) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) @@ -241,6 +255,8 @@ REG_OP(RandomStandardNormal) *@attention Constraints:\n *-The implementation for RandomUniformInt on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomUniformInt operator. */ REG_OP(RandomUniformInt) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) @@ -269,6 +285,8 @@ REG_OP(RandomUniformInt) *@attention Constraints:\n *-The implementation for RandomUniform on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow RandomUniform operator. */ REG_OP(RandomUniform) .INPUT(shape, TensorType({DT_INT32, DT_INT64})) @@ -295,6 +313,8 @@ REG_OP(RandomUniform) *@attention Constraints:\n *-The implementation for TruncatedNormal on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow TruncatedNormal operator. */ REG_OP(TruncatedNormal) .INPUT(shape, TensorType({ DT_INT32, DT_INT64 })) @@ -335,18 +355,21 @@ REG_OP(DropOutGenMask) /** *@brief Generates values in an interval. -*@par Inputs:\n +*@par Inputs: * Four ND inputs, including: -*@li input_assist: A 1D Tensor of type float32. -*@li input_start: A 1D Tensor of type float32, for the first entry in the range. -*@li input_stop: A 1D Tensor of type float32, for the last entry in the range. -*@li input_num: A 1D Tensor of type int32, for the common difference of the entries. +*@li assist: A 1D Tensor of type float32. +*@li start: A 1D Tensor of type float32, for the first entry in the range. +*@li stop: A 1D Tensor of type float32, for the last entry in the range. +*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries. -*@par Outputs:\n +*@par Outputs: *output_op: A 1D Tensor of type float32. -*@attention Constraints:\n +*@attention Constraints: * "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator lin_space. */ REG_OP(LinSpaceD) .INPUT(assist, TensorType({DT_FLOAT})) @@ -359,19 +382,20 @@ REG_OP(LinSpaceD) /** *@brief Generates values in an interval. -*@par Inputs:\n +*@par Inputs: * Four ND inputs, including: -*@li input_assist: A 1D Tensor of type float32. -*@li input_start: A 1D Tensor of type float32, for the first entry in the range. -*@li input_stop: A 1D Tensor of type float32, for the last entry in the range. -*@li input_num: A 1D Tensor of type int32, for the common difference of the entries. +*@li start: A 1D Tensor of type float32, for the first entry in the range. +*@li stop: A 1D Tensor of type float32, for the last entry in the range. +*@li num: A 1D Tensor of type int32 or int64, for the common difference of the entries. -*@par Outputs:\n +*@par Outputs: *output_op: A 1D Tensor of type float32. -*@attention Constraints:\n +*@attention Constraints: * "input_assist" is a sequence of "input_num" evenly-spaced values beginning at 0 with an common difference of 1. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator lin_space. */ REG_OP(LinSpace) .INPUT(start, TensorType({DT_FLOAT, DT_DOUBLE})) @@ -435,6 +459,8 @@ REG_OP(RandomChoiceWithMask) *@attention Constraints:\n *@li "group" must be greater than 0 and must evenly divide the channel dimension size. *@li The format of input "x" must be NCHW. +*@par Third-party framework compatibility +* Compatible with the Caffe operator ShuffleChannel. */ REG_OP(ShuffleChannel) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index a0f78291..8819d2d5 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -23,15 +23,17 @@ namespace ge { /** *@brief Performs reduced batch normalization. -*@par Inputs:\n +*@par Inputs: *x: A 5D Tensor of type float16 or float32, with format NC1HWC0. *@par Outputs: *@li sum: A 1D Tensor of type float32 for SUM reduced "x". *@li square_sum: A 1D Tensor of type float32 for SUMSQ reduced "x". -*@attention Constraints:\n -* This operator is a BatchNorm fusion operator for updating the moving averages for training. \n This operator is used in conjunction with BNTrainingUpdate. +*@attention Constraints: +* This operator is a BatchNorm fusion operator for updating the moving +* averages for training. \n +* This operator is used in conjunction with BNTrainingUpdate. */ REG_OP(BNTrainingReduce) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -44,19 +46,26 @@ REG_OP(BNTrainingReduce) *@par Inputs: * Seven inputs, including: \n -*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for the gradient. +*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for +* the gradient. *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. -*@li diff_scale: A 5D Tensor of type float32, with format NC1HWC0, for the mean of "x". -*@li diff_offset: A 5D Tensor of type float32, with format NC1HWC0, for the variance of "x". +*@li diff_scale: A 5D Tensor of type float32, with format NC1HWC0, +* for the mean of "x". +*@li diff_offset: A 5D Tensor of type float32, with format NC1HWC0, +* for the variance of "x". *@li scale: A 5D Tensor of type float32, with format NC1HWC0. -*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, for the mean of "x". -*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, for the variance of "x". +*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, +* for the mean of "x". +*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, +* for the variance of "x". *@par Attributes: -*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x". *@par Outputs: -*y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset of "x". +*y: A Tensor of type float16 or float32, with format NC1HWC0, for the offset +* of "x". *@attention Constraints: * The preceding layer of this operator must be BNTrainingUpdateGrad. @@ -78,21 +87,25 @@ REG_OP(BNTrainingReduceGrad) /** *@brief Performs reduced batch normalization. -*@par Inputs:\n +*@par Inputs: * Seven inputs, including: (NC1HWC0 supported) *@li x: A 5D Tensor of type float16 or float32. -*@li sum: A 1D Tensor of type float32 for the output of operator BNTrainingReduce. -*@li square_sum: A 1D Tensor of type float32 for the output of operator BNTrainingReduce. +*@li sum: A 1D Tensor of type float32 for the output of operator +* BNTrainingReduce. +*@li square_sum: A 1D Tensor of type float32 for the output of operator +* BNTrainingReduce. *@li scale: A 1D Tensor of type float32, for the scaling factor. *@li offset: A 1D Tensor of type float32, for the scaling offset. *@li mean: A 1D Tensor of type float32, for the updated mean. *@li variance: A 1D Tensor of type float32, for the updated variance. *@par Attributes: -*@li epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero. -*@li factor: A required float32, specifying the weight for updating the mean and variance. +*@li epsilon: A required float32, specifying the small value added to variance +* to avoid dividing by zero. +*@li factor: A required float32, specifying the weight for updating the mean +* and variance. -*@par Outputs:\n +*@par Outputs: * Five outputs, including: (NC1HWC0 supported) *@li y: A 5D Tensor of type float16 or float32, for normalized "x". *@li mean: A 5D Tensor of type float32, for the updated mean. @@ -101,8 +114,11 @@ REG_OP(BNTrainingReduceGrad) *@li batch_variance: A 1D Tensor of type float32, for the variance of "x". *@attention Constraints: -*@li This operator is a BatchNorm fusion operator for updating the moving averages for training. \n This operator is used in conjunction with BNTrainingReduce. -*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*@li This operator is a BatchNorm fusion operator for updating the moving +* averages for training. \n +*This operator is used in conjunction with BNTrainingReduce. +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square +* root instruction. */ REG_OP(BNTrainingUpdate) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -133,13 +149,15 @@ REG_OP(BNTrainingUpdate) *@li variance: A 5D Tensor of type float32, for the variance. *@par Attributes: -*epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*epsilon: An optional float32, specifying the small value added to variance to +* avoid dividing by zero. Defaults to "0.0001". *@par Outputs:\n *y: A 5D Tensor of type float16 or float32 for the normalized "x". *@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction. +*For Ascend 310, the result accuracy fails to reach 1‰ due to the square root +* instruction. */ REG_OP(BNInfer) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -207,17 +225,23 @@ REG_OP(BNTrainingUpdateV3) *@par Inputs: * Four inputs, including: \n -*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, for the gradient. +*@li grads: A 5D Tensor of type float16 or float32, with format NC1HWC0, +* for the gradient. *@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. -*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, for the mean of "x". -*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, for the variance of "x". +*@li batch_mean: A 5D Tensor of type float32, with format NC1HWC0, +* for the mean of "x". +*@li batch_variance: A 5D Tensor of type float32, with format NC1HWC0, +* for the variance of "x". *@par Attributes: -*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x". +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x". *@par Outputs: -*@li diff_scale: A Tensor of type float32, with format NC1HWC0, for the offset of "scale". -*@li diff_offset: A Tensor of type float32, with format NC1HWC0, for the offset of "offset". +*@li diff_scale: A Tensor of type float32, with format NC1HWC0, +* for the offset of "scale". +*@li diff_offset: A Tensor of type float32, with format NC1HWC0, +* for the offset of "offset". */ REG_OP(BNTrainingUpdateGrad) @@ -261,7 +285,10 @@ REG_OP(BNInferGrad) *@par Inputs: * Two inputs, including: \n -*@li x: A Tensor of type float16 or float32. Up to 8D. +*@li x: A Tensor. Must be one of the following types: +* float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, uint16, +* complex128, float16, uint32, uint64, complex64, complex128. *@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce. *@par Attributes: @@ -270,6 +297,8 @@ REG_OP(BNInferGrad) *@par Outputs: *y: The reduced tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Sum. */ REG_OP(ReduceSum) .INPUT(x, TensorType::NumberType()) @@ -292,6 +321,8 @@ REG_OP(ReduceSum) *@par Outputs: *y: The reduced tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Sum. */ REG_OP(ReduceSumD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT32})) @@ -305,8 +336,7 @@ REG_OP(ReduceSumD) *@par Inputs: *One input: -*x: A mutable Tensor. Must be one of the following types: float16, -* float32, double. Should be a Variable Tensor. +*x: The boolean tensor to reduce. *@par Attributes: *@li keep_dims: A bool. If true, retains reduced dimensions with length 1. @@ -315,6 +345,9 @@ REG_OP(ReduceSumD) *@par Outputs: *y: The reduced tensor. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ReduceAll. */ REG_OP(ReduceAllD) .INPUT(x, TensorType({DT_BOOL})) @@ -328,7 +361,7 @@ REG_OP(ReduceAllD) *@par Inputs: *Two inputs, including: -*@li x: A mutable Tensor. Must be one of the following types: float16, float32, double. Should be a Variable Tensor. +*@li x: The boolean tensor to reduce. *@li axis: A mutable Tensor. The dimensions to reduce. If None, reduces all dimensions. Must be in the range [- rank (input_sensor), rank (input_sensor)). *@par Attributes: @@ -336,6 +369,9 @@ REG_OP(ReduceAllD) *@par Outputs: *y: The reduced tensor. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ReduceAll. */ REG_OP(ReduceAll) .INPUT(x, TensorType({DT_BOOL})) @@ -358,6 +394,8 @@ REG_OP(ReduceAll) *@par Outputs: *y: A Tensor. Has the same type and format as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ReduceProd. */ REG_OP(ReduceProd) .INPUT(x,TensorType::NumberType()) @@ -383,6 +421,8 @@ REG_OP(ReduceProd) *@attention Constraints: * "keep_dims" is in the range [-rank(input_tensor), rank(input_tensor)]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ReduceProd. */ REG_OP(ReduceProdD) .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16})) @@ -407,6 +447,9 @@ REG_OP(ReduceProdD) * - If false, the rank of the tensor is reduced by 1 for each entry in axis. *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator ReduceMean. */ REG_OP(ReduceMean) .INPUT(x, TensorType::NumberType()) @@ -431,6 +474,9 @@ REG_OP(ReduceMean) * - If false, the rank of the tensor is reduced by 1 for each entry in axis. *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator ReduceMean. */ REG_OP(ReduceMeanD) .INPUT(x, TensorType({DT_FLOAT16, DT_INT32, DT_FLOAT, DT_INT8, DT_UINT8})) @@ -456,6 +502,8 @@ REG_OP(ReduceMeanD) *@attention Constraints: * The value range of "axes" is [-dims, dims - 1]. "dims" indicates the dimension length of "x". +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Max. */ REG_OP(ReduceMax) .INPUT(x, TensorType::NumberType()) @@ -480,6 +528,9 @@ REG_OP(ReduceMax) *@attention Constraints: * The value range of "axis" is [-dims, dims - 1]. "dims" indicates the dimension length of "x". + +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Max. */ REG_OP(ReduceMaxD) .INPUT(x, TensorType({DT_FLOAT, DT_UINT8, DT_INT8, @@ -506,6 +557,8 @@ REG_OP(ReduceMaxD) *@attention Constraints:\n * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator reduce_min. */ REG_OP(ReduceMin) .INPUT(x, TensorType::NumberType()) @@ -530,6 +583,8 @@ REG_OP(ReduceMin) *@attention Constraints:\n * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator reduce_min. */ REG_OP(ReduceMinD) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8})) @@ -538,7 +593,7 @@ REG_OP(ReduceMinD) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceMinD) /** -*@brief Computes the "logical or" of elements across dimensions of a tensor.\n +*@brief Computes the "logical or" of elements across dimensions of a tensor. * Reduces "x" along the dimensions given in "axes". * Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each * entry in "axes". If "keep_dims" is true, the reduced dimensions @@ -547,7 +602,7 @@ REG_OP(ReduceMinD) * If "axes" is None, all dimensions are reduced, and a * tensor with a single element is returned. * -*@attention Constraints:\n +*@attention Constraints: * Only support bool * *@par Inputs: @@ -561,6 +616,9 @@ REG_OP(ReduceMinD) *@par Outputs: * y: The reduced tensor * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator reduce_any. +* */ REG_OP(ReduceAny) .INPUT(x, TensorType({DT_BOOL})) @@ -569,7 +627,7 @@ REG_OP(ReduceAny) .ATTR(keep_dims, Bool, false) .OP_END_FACTORY_REG(ReduceAny) /** -*@brief Computes the "logical or" of elements across dimensions of a tensor.\n +*@brief Computes the "logical or" of elements across dimensions of a tensor. * Reduces "x" along the dimensions given in "axes". * Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each * entry in "axes". If "keep_dims" is true, the reduced dimensions @@ -578,20 +636,22 @@ REG_OP(ReduceAny) * If "axis" is None, all dimensions are reduced, and a * tensor with a single element is returned. * -*@attention Constraints:\n +*@attention Constraints: * Only support bool * *@par Inputs: * x: The boolean tensor to reduce. * *@par Attributes: -*@li axes: The dimensions to reduce. If "None" (default), reduces all -* dimensions. Must be in the range "[-rank(x), rank(x))". +*@li axes: The dimensions to reduce. Must be in the range "[-rank(x), rank(x))". *@li keep_dims: If true, retains reduced dimensions with length 1. * *@par Outputs: * y: The reduced tensor * +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator reduce_any. +* */ REG_OP(ReduceAnyD) .INPUT(x, TensorType({DT_BOOL})) @@ -622,6 +682,8 @@ REG_OP(ReduceAnyD) *y: A Tensor. Has the same type as "x". *@attention Constraints: The Reduction operator supports type float16 only on the device chip. +*@par Third-party framework compatibility +* Compatible with the Caffe operator Reduction. */ REG_OP(Reduction) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -647,6 +709,8 @@ REG_OP(Reduction) *@attention Constraints:\n * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator EuclideanNorm. */ REG_OP(EuclideanNorm) .INPUT(x, TensorType::NumberType()) @@ -671,6 +735,8 @@ REG_OP(EuclideanNorm) *@attention Constraints:\n * If "axes = None", all dimensions will be reduced. "axes" must be in the range [-rank(input_shape), rank(input_shape)). +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator EuclideanNorm. */ REG_OP(EuclideanNormD) .INPUT(x, TensorType({DT_FLOAT, DT_INT32, DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/sdca_ops.h b/third_party/fwkacllib/inc/ops/sdca_ops.h index 15428d2b..2cbafc3c 100644 --- a/third_party/fwkacllib/inc/ops/sdca_ops.h +++ b/third_party/fwkacllib/inc/ops/sdca_ops.h @@ -54,6 +54,8 @@ namespace ge { *weights associated with a sparse feature group.a list of vectors where the values are the delta \n *weights associated with a dense feature group. +*@par Third-party framework compatibility +* Compatible with tensorflow SdcaOptimizerV2 operator. */ REG_OP(SdcaOptimizerV2) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index f3b588b1..95bcd039 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -35,6 +35,9 @@ namespace ge { *@par Outputs: *y: A 1D Tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Range. */ REG_OP(Range) .INPUT(start, TensorType({DT_FLOAT,DT_INT32,DT_DOUBLE,DT_INT64})) @@ -94,6 +97,9 @@ REG_OP(RangeD) *y: A Tensor. Has the same type as "x". *@see TileD() + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Tile. */ REG_OP(Tile) .INPUT(x, TensorType::BasicType()) @@ -115,6 +121,9 @@ REG_OP(Tile) *y: A Tensor. Has the same type as "x". *@see Tile() + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Tile. */ REG_OP(TileD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -123,7 +132,7 @@ REG_OP(TileD) .OP_END_FACTORY_REG(TileD) /** -* @brief Gather slices from "x" into a tensor with shape specified by\n +* @brief Gather slices from "x" into a tensor with shape specified by * "indices". "indices" is an K-dimensional integer tensor, best thought of as a\n * (K-1)-dimensional tensor of "indices" into "params", where each element\n * defines a slice of "params":\n @@ -144,8 +153,12 @@ REG_OP(TileD) * @see GatherNd() * @attention Constraints: -* @li "x" is one of the following types: float16, float32, int32, int8, -* uint8. +* @li "x" is one of the following types: float16, float32, double, int32, +* uint8, int16, int8, complex64, int64, qint8, quint8, qint32, uint16, +* complex128, uint32, uint64. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator GatherNd. */ REG_OP(GatherNd) .INPUT(x, TensorType::BasicType()) @@ -158,9 +171,9 @@ REG_OP(GatherNd) *@par Inputs: *Three inputs, including: -* @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n -* complex128, float16, uint32, uint64, complex64, complex128. +* @li x: A Tensor. Must be one of the following types: float32, float64, int32, +* uint8, int16, int8, int64, qint8, quint8, qint32, qint16, quint16, +* uint16, complex128, float16, uint32, uint64, complex64, complex128. * @li indices: A Tensor of type int32 or int64. * @li axis: A Tensor of type as int32. @@ -169,6 +182,10 @@ REG_OP(GatherNd) *@attention Constraints: *Value in indices must be in range [0, x.shape[axis]) + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator GatherV2. + */ REG_OP(GatherV2) .INPUT(x, TensorType::BasicType()) @@ -191,6 +208,12 @@ REG_OP(GatherV2) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@attention Constraints: + + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator GatherV2. */ REG_OP(GatherV2D) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT32, DT_INT8, DT_UINT8, @@ -236,6 +259,11 @@ REG_OP(GatherV2D) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@attention Constraints: + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator StridedSlice. */ REG_OP(StridedSlice) .INPUT(x, TensorType::BasicType()) @@ -283,6 +311,11 @@ REG_OP(StridedSlice) *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@attention Constraints: + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator StridedSlice. */ REG_OP(StridedSliceD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8, @@ -333,6 +366,11 @@ REG_OP(StridedSliceD) *@par Outputs: *output: A Tensor. Has the same type as "dy". + +*@attention Constraints: + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator StridedSliceGradD. */ REG_OP(StridedSliceGradD) .INPUT(dy, TensorType::BasicType()) @@ -349,41 +387,46 @@ REG_OP(StridedSliceGradD) .OP_END_FACTORY_REG(StridedSliceGradD) /** -*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", \n - its gradient will have the same shape (which is passed here as "shape"). \n +*@brief Since StridedSlice cuts out pieces of its "input" which is size "dy", + its gradient will have the same shape (which is passed here as "shape"). The gradient will be zero in any element that the slice does not select. *@par Inputs: *Five inputs, including: * @li shape: A Tensor of type int32 or int64. -* @li begin: A Tensor of type int32 or int64. \n +* @li begin: A Tensor of type int32 or int64. The index of the first value to select. -* @li end: A Tensor of type int32 or int64. \n +* @li end: A Tensor of type int32 or int64. The index of the last value to select. * @li strides: A Tensor of type int32 or int64, for the increment. -* @li dy: A Tensor. Must be one of the following types: \n -* float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n +* @li dy: A Tensor. Must be one of the following types: +* float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, * complex128, float16, uint32, uint64, complex64, complex128. *@par Attributes: -* @li begin_mask: A Tensor of type int32. \n - A bitmask where a bit "i" being "1" means to ignore the begin \n +* @li begin_mask: A Tensor of type int32. + A bitmask where a bit "i" being "1" means to ignore the begin value and instead use the largest interval possible. -* @li end_mask: A Tensor of type int32. \n +* @li end_mask: A Tensor of type int32. Analogous to "begin_mask". -* @li ellipsis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th position \n +* @li ellipsis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th position is actually an ellipsis. -* @li new_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" being "1" means the "i"th \n +* @li new_axis_mask: A Tensor of type int32. + A bitmask where bit "i" being "1" means the "i"th specification creates a new shape 1 dimension. -* @li shrink_axis_mask: A Tensor of type int32. \n - A bitmask where bit "i" implies that the "i"th \n +* @li shrink_axis_mask: A Tensor of type int32. + A bitmask where bit "i" implies that the "i"th specification should shrink the dimensionality. *@par Outputs: *output: A Tensor has the same type as "dy". + +*@attention Constraints: + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator StridedSliceGrad. */ REG_OP(StridedSliceGrad) .INPUT(shape, TensorType::IndexNumberType()) @@ -410,7 +453,10 @@ REG_OP(StridedSliceGrad) * @li num_segments: A Tensor of type IndexNumberType. *@par Outputs: -*y: A Tensor of type RealNumberType. +*y: A Tensor of type NumberType. + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator UnsortedSegmentSum. */ REG_OP(UnsortedSegmentSum) .INPUT(x, TensorType::NumberType()) @@ -433,6 +479,9 @@ REG_OP(UnsortedSegmentSum) *@par Outputs: *y: A Tensor with same type as "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator UnsortedSegmentSum. */ REG_OP(UnsortedSegmentSumD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_UINT8})) @@ -456,6 +505,9 @@ REG_OP(UnsortedSegmentSumD) *@attention Constraints: "axis" must be within the rank of "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ReverseV2. */ REG_OP(ReverseV2) .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, @@ -472,17 +524,21 @@ REG_OP(ReverseV2) *@par Inputs: * One input: -*@li x: An ND Tensor (up to 8D). \n -*Must be one of the following types: int8, uint8, int16, uint16, int32, int64, bool, float32, float64 +*@li x: An ND Tensor (up to 8D). +* Must be one of the following types: int8, uint8, int16, uint16, int32, +* int64, bool, float16, float, double, complex64, complex128, string. *@par Attributes: -*axis: The indices of the dimensions to reverse. +*axis: The indices of the dimensions to reverse. Support type: listInt. *@par Outputs: *y: A Tensor. Has the same type and format as "x" *@attention Constraints: "axis" must be within the rank of "x". + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ReverseV2. */ REG_OP(ReverseV2D) .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, @@ -500,11 +556,17 @@ REG_OP(ReverseV2D) *@par Inputs: * Three inputs, including: * @li condition: A Tensor of type bool. -* @li x1: A Tensor. Must be one of the following types: float16, float32, int32, int8, uint8. -* @li x2: A Tensor of the same type as "x1". +* @li x1: A Tensor. Must be one of the following types: float16, float32, + * int32, int8, uint8, int16, uint16, double, complex64, int64, complex128 + * half, qint8, quint8, qint16, quint16, qint32, quint32, uint32, uint64. + * format:ND +* @li x2: A Tensor of the same type as "x1".format:ND *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". format:ND + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Select. */ REG_OP(Select) .INPUT(condition, TensorType({DT_BOOL})) @@ -525,6 +587,8 @@ REG_OP(Select) *@par Outputs: *result: A Tensor. Has the same type as "then". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SelectV2. */ REG_OP(SelectV2) .INPUT(condition, TensorType({DT_BOOL})) @@ -548,6 +612,9 @@ REG_OP(SelectV2) *@par Outputs: *y:A Tensor with same type as "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SegmentMax. */ REG_OP(SegmentMax) .INPUT(x, TensorType::RealNumberType()) @@ -557,20 +624,24 @@ REG_OP(SegmentMax) /** *@brief: Computes the maximum along segments of a tensor. -*Computes a tensor such that output[i]=(data[i]) where max is over j such that segment_ids[j] == i. +*Computes a tensor such that output[i]=(data[i]) where max is over j + * such that segment_ids[j] == i. *If the max is empty for a given segment ID i, output[i] = 0 *@par Inputs: *One inputs, include: -* @li x:A Tensor of type float16, float32, int32, int8,uint8 . +* @li x:A Tensor of type float16, float, int32. format:ND *@par Attributes: * @li segment_ids:should be the size of the first dimension - must sorted and need not cover all values in the full range of valid values - must be positive intege + must sorted and need not cover all values in + the full range of valid values must be positive intege *@par Outputs: -*y:A Tensor with same type as "x". +*y:A Tensor with same type as "x". format:ND + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SegmentMax. */ REG_OP(SegmentMaxD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -596,6 +667,9 @@ REG_OP(SegmentMaxD) *@par Outputs: *y: A Tensor. Has the same type as "on_value". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator OneHot. */ REG_OP(OneHot) .INPUT(x, TensorType({DT_UINT8, DT_INT32, DT_INT64})) @@ -624,6 +698,9 @@ REG_OP(OneHot) *@par Outputs: *y: A Tensor. Has the same type as "on_value". + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator OneHot. */ REG_OP(OneHotD) .INPUT(x, TensorType({DT_UINT8, DT_INT32})) @@ -637,16 +714,22 @@ REG_OP(OneHotD) .OP_END_FACTORY_REG(OneHotD) /** -*@brief Extracts a slice from a tensor.\n - This operation extracts a slice of size "size" from a tensor "x" starting at the location specified by "begin". +*@brief Extracts a slice from a tensor. +* This operation extracts a slice of size "size" from a tensor "x" +* starting at the location specified by "begin". *@par Inputs: -*@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@li offsets: A Tensor of type int32 or int64. The starting location for the slice. *@li size: A Tensor of type int32 or int64. The tensor shape. *@par Outputs: *y: A Tensor. Has the same type as "x". The slice extracted from the tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Slice. */ REG_OP(Slice) .INPUT(x, TensorType::BasicType()) @@ -656,11 +739,14 @@ REG_OP(Slice) .OP_END_FACTORY_REG(Slice) /** -*@brief Extracts a slice from a tensor.\n - This operation extracts a slice of size "size" from a tensor "x" starting at the location specified by "begin". +*@brief Extracts a slice from a tensor. +* This operation extracts a slice of size "size" from a tensor "x" +* starting at the location specified by "begin". *@par Inputs: -*x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. +*@li x: A Tensor. Must be one of the following types: +* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@par Attributes: *@li offsets: The starting location for the slice. @@ -707,6 +793,9 @@ REG_OP(SliceD) * @li Size of the last dimension =< 65500 * @li sorted = true * @li Don't support to get score on the platform of Ascend310 + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator TopK. */ REG_OP(TopKD) .INPUT(x, TensorType::RealNumberType()) @@ -742,6 +831,8 @@ REG_OP(TopKD) * @li indices: A Tensor of type int32, specifying the indices of sorted data. * @see TopK() +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator TopKV2. */ REG_OP(TopK) .INPUT(x, TensorType::RealNumberType()) @@ -764,6 +855,8 @@ REG_OP(TopK) *@attention Constraints:\n *@li "y" has the same shape as "shape". *@li "y" has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNd. */ REG_OP(ScatterNd) .INPUT(indices, TensorType::BasicType()) @@ -772,20 +865,26 @@ REG_OP(ScatterNd) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(ScatterNd) /** -*@brief Creates a new tensor by applying sparse "updates" to individual values or slices within a tensor (initially zero for numeric, empty for string) of the given "shape" according to "indices". +*@brief Creates a new tensor by applying sparse "updates" to individual values + * or slices within a tensor (initially zero for numeric, empty for string) of + * the given "shape" according to "indices". *@par Inputs: -*Inputs including: \n -* @li indices: A required index tensor. Must be one of the following types: float32, float16, int32, int8, uint8. -* @li x: A required slice tensor. Must be one of the following types: float32, float16, int32, int8, uint8. +*Inputs including: +* @li indices: A required index tensor. Must be one of the following types: + * float, float16, int32, int16. format:ND. +* @li x: A required slice tensor. Must be one of the following types: + * float, float16, int32, int16. format:ND. *@par Attributes: * @li shape: A required list of int32, specifying the output shape. *@par Outputs: -*y: A Tensor. Has the same type as "updates". +*y: A Tensor. Has the same type as "x". format:ND. -*@attention Constraints:\n +*@attention Constraints: *@li "y" has the same shape as "shape". *@li "y" has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ScatterNd. */ REG_OP(ScatterNdD) .INPUT(indices, TensorType::IndexNumberType()) @@ -800,7 +899,7 @@ REG_OP(ScatterNdD) * @par Inputs: * Three inputs, including: * @li x1: A 2D Tensor of type float32. A "batch_size * classes" tensor. -* @li x2: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids. +* @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids. * @par Attributes: * @li k: A required int32, specifying the number of top elements to look at for @@ -810,6 +909,9 @@ REG_OP(ScatterNdD) * y: A Tensor of type bool. * @see InTopK() + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator InTopK. */ REG_OP(InTopKD) .INPUT(x1, TensorType({DT_FLOAT})) @@ -830,6 +932,9 @@ REG_OP(InTopKD) * @par Outputs: * y: A Tensor of type bool. + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator InTopKV2. */ REG_OP(InTopK) .INPUT(x1, TensorType({DT_FLOAT})) @@ -845,6 +950,7 @@ REG_OP(InTopK) * "strides", etc. work exactly as in "StridedSlice". * @par Inputs: +* Five inputs, including: * @li var: A mutable ND Tensor of type BasicType. * @li begin: A mutable ND Tensor of type IndexNumberType. * Specifies the index of the first value to select. @@ -869,6 +975,9 @@ REG_OP(InTopK) * of "value" must be exactly the shape produced by the slice of "var". * @see StridedSlice() + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator StridedSlice. */ REG_OP(StridedSliceAssign) .INPUT(var, TensorType(BasicType)) @@ -931,15 +1040,15 @@ REG_OP(StridedSliceAssignD) .OP_END_FACTORY_REG(StridedSliceAssignD) /** -*@brief Gather slices from "params" according to "indices"."indices" must be \n +*@brief Gather slices from "params" according to "indices"."indices" must be an integer tensor of any dimension(usually 0-D or 1-D). \n Produces an output tensor with shape "indices.shape + params.shape[1:]". *@par Inputs: *Two inputs, including: * @li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, \n -* complex64, int64, qint8, quint8, qint32, qint16, quint16, uint16, \n -* complex128, float16, uint32, uint64, complex64, complex128. +* int64, qint8, quint8, qint32, qint16, quint16, uint16, \n +* float16, uint32, uint64, complex64, complex128. * @li indices: A Tensor of type int32 or int64. *@par Attributes: @@ -950,6 +1059,10 @@ REG_OP(StridedSliceAssignD) *@attention Constraints: * "indices" is in the range [0, x.shape[0]). + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Gather. + */ REG_OP(Gather) .INPUT(x, TensorType::BasicType()) @@ -963,15 +1076,19 @@ REG_OP(Gather) *@par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: int32, float32, float16, int8, uint8. -*@li axis A Tensor of type int32. Defaults to "0". +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 +*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * *@par Attributes: -*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive cumprod. +*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input +* is identical to the first element of the output. If "True", performs exclusive cumprod. *@li reverse: A bool. Defaults to "False". * *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cumprod. */ REG_OP(Cumprod) .INPUT(x, TensorType::NumberType()) @@ -986,15 +1103,19 @@ REG_OP(Cumprod) *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: int32, float32, float16, int8, uint8. +*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64 * *@par Attributes: -*@li axis A Tensor of type int32. Defaults to "0". -*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive cumprod. +*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". +*@li exclusive: If "False", performs inclusive cumprod, which means that the first element of the input +* is identical to the first element of the output. If "True", performs exclusive cumprod. *@li reverse: A bool. Defaults to "False". * *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cumprod. */ REG_OP(CumprodD) .INPUT(x, TensorType::NumberType()) @@ -1009,15 +1130,19 @@ REG_OP(CumprodD) *@par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: int32, float32, float16, int8, uint8. -*@li axis A Tensor of type int32. Defaults to "0". +*@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. +*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". * *@par Attributes: -*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive cumsum. +*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is +* identical to the first element of the output. If "True", performs exclusive cumsum. *@li reverse: A bool. Defaults to "False". * *@par Outputs: *@li y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cumsum. */ REG_OP(Cumsum) .INPUT(x, TensorType::NumberType()) @@ -1032,15 +1157,19 @@ REG_OP(Cumsum) * *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: int32, float32, float16, int8, uint8. +*x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, +* complex64, int64, qint8, quint8, qint32, uint16, complex128, float16, uint32, uint64. * *@par Attributes: -*@li axis A Tensor of type int32. Defaults to "0". -*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive cumsum. +*@li axis A Tensor of type int32 or int64. Range is [-rank(x),rank(x)). Defaults to "0". +*@li exclusive: If "False", performs inclusive cumsum, which means that the first element of the input is +* identical to the first element of the output. If "True", performs exclusive cumsum. *@li reverse: A bool. Defaults to "False". * *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cumsum. */ REG_OP(CumsumD) .INPUT(x, TensorType::NumberType()) @@ -1051,7 +1180,7 @@ REG_OP(CumsumD) .OP_END_FACTORY_REG(CumsumD) /** -*@brief Updates specified rows with values in v. \n +*@brief Updates specified rows with values in v. *Computes x[i, :] = v; return x. *@par Inputs: *Three inputs, including: @@ -1066,6 +1195,8 @@ REG_OP(CumsumD) *@par Outputs: *y: A Tensor of the same type as "x". \n * An alias of "x". The content of "y" is undefined if there are duplicates in indices. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InplaceUpdate. */ REG_OP(InplaceUpdate) .INPUT(x, TensorType::BasicType()) @@ -1075,12 +1206,11 @@ REG_OP(InplaceUpdate) .OP_END_FACTORY_REG(InplaceUpdate) /** -*@brief Updates specified rows with values in v. \n +*@brief Updates specified rows with values in v. *Computes x[i, :] = v; return x. *@par Inputs: *Two inputs, including: -* @li x: A Tensor. \n -* TensorType::NumberType(). +* @li x: A Tensor of type int32, float16, floay32. * @li v: A Tensor of the same type as "x". \n * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". @@ -1090,6 +1220,9 @@ REG_OP(InplaceUpdate) *@par Outputs: *y: A Tensor of the same type as "x". \n * An alias of "x". The content of "y" is undefined if there are duplicates in indices. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InplaceUpdate. */ REG_OP(InplaceUpdateD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1099,7 +1232,7 @@ REG_OP(InplaceUpdateD) .OP_END_FACTORY_REG(InplaceUpdateD) /** -*@brief Adds "v" into specified rows of "x". \n +*@brief Adds "v" into specified rows of "x". *Computes y = x; y[i, :] += v. *@par Inputs: *Three inputs, including: @@ -1114,6 +1247,8 @@ REG_OP(InplaceUpdateD) *@par Outputs: *y: A Tensor of the same type as "x". \n * An alias of "x". The content of "y" is undefined if there are duplicates in indices. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InplaceAdd. */ REG_OP(InplaceAdd) .INPUT(x, TensorType::BasicType()) @@ -1123,12 +1258,11 @@ REG_OP(InplaceAdd) .OP_END_FACTORY_REG(InplaceAdd) /** -*@brief Adds "v" into specified rows of "x". \n +*@brief Adds "v" into specified rows of "x". *Computes y = x; y[i, :] += v. *@par Inputs: *Two inputs, including: -* @li x: A Tensor. \n -* TensorType::NumberType(). +* @li x: A Tensor of type is int32, float16, float32. * @li v: A Tensor of the same type as "x". \n * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". @@ -1138,6 +1272,9 @@ REG_OP(InplaceAdd) *@par Outputs: *y: A Tensor of the same type as "x". \n * An alias of "x". The content of "y" is undefined if there are duplicates in indices. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InplaceAdd. */ REG_OP(InplaceAddD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1147,7 +1284,7 @@ REG_OP(InplaceAddD) .OP_END_FACTORY_REG(InplaceAddD) /** -*@brief Subtracts "v" into specified rows of "x". \n +*@brief Subtracts "v" into specified rows of "x". *Computes y = x; y[i, :] -= v; return y. *@par Inputs: **Three inputs, including: @@ -1159,6 +1296,9 @@ REG_OP(InplaceAddD) *@par Outputs: *y: A Tensor. Has the same type as "x".\n * An alias of "x". The content of "y" is undefined if there are duplicates in indices. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InplaceSub. */ REG_OP(InplaceSub) .INPUT(x, TensorType::BasicType()) @@ -1168,12 +1308,12 @@ REG_OP(InplaceSub) .OP_END_FACTORY_REG(InplaceSub) /** -*@brief Subtracts "v" into specified rows of "x". \n +*@brief Subtracts "v" into specified rows of "x". *Computes y = x; y[i, :] -= v. *@par Inputs: **Two inputs, including: -* @li x: A Tensor. TensorType::NumberType(). +* @li x: A Tensor of type is int32, float16, float32. * @li v: A Tensor of the same type as "x". \n * Same dimension sizes as "x" except the first dimension, which must be the same as the size of "indices". @@ -1183,6 +1323,9 @@ REG_OP(InplaceSub) *@par Outputs: *y: A Tensor. Has the same type as "x".\n * An alias of x. The content of y is undefined if there are duplicates in indices. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator InplaceSub. */ REG_OP(InplaceSubD) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) @@ -1210,6 +1353,9 @@ REG_OP(InplaceSubD) * "updates". * @see ScatterNd(),ScatterNdAdd() + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator ScatterNDNonAliasingAdd. */ REG_OP(ScatterNonAliasingAdd) .INPUT(x, TensorType::NumberType()) @@ -1232,6 +1378,9 @@ REG_OP(ScatterNonAliasingAdd) * y: A Tensor of type RealNumberType. * @see UnsortedSegmentSum(), UnsortedSegmentProd(), + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator UnsortedSegmentMin. */ REG_OP(UnsortedSegmentMin) .INPUT(x, TensorType::RealNumberType()) @@ -1279,6 +1428,9 @@ REG_OP(UnsortedSegmentMinD) * y: A Tensor of type RealNumberType. * @see UnsortedSegmentSum(), UnsortedSegmentMin(), + +* @par Third-party framework compatibility +* @li Compatible with the TensorFlow operator UnsortedSegmentProd. */ REG_OP(UnsortedSegmentProd) .INPUT(x, TensorType::NumberType()) @@ -1332,8 +1484,10 @@ REG_OP(UnsortedSegmentProdD) *@li output_actual_rois_num: An optional bool. Defaults to "false". *@par Outputs: -*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". +*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". *@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(Proposal) .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1373,8 +1527,10 @@ REG_OP(UnsortedSegmentProdD) *@li output_actual_rois_num: An optional bool. Defaults to "false". *@par Outputs: -*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". +*@li rois: A Tensor with shape [batch, 5, post_nms_topn], of type float16 or float32, specifying the output box information. "post_nms_topn" must be a multiple of 16. The dimension "5" indicates (batchID, x1, y1, x2, y2). The number of BBoxes output per batch is determined by "actual_rois_num". *@li actual_rois_num: A Tensor with shape [batch, 8], of type int32, specifying the number of BBoxes output per batch. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(ProposalD) .INPUT(cls_prob, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -1400,17 +1556,19 @@ REG_OP(ProposalD) * If reverse=false: (N, H, W, C)->(N, H/stride, W/stride, C*(stride*stride)) *@par Inputs: -*x: An (N, H, W, C) tensor. All data types are supported. +*x: An (N, H, W, C) tensor. All types except double are supported. *@par Attributes: *@li stride: An optional int32, specifying the plane or channel scaling factor. Defaults to "2". *@li reverse: An optional bool, specifying the conversion mode. If "true", depth to space conversion is performed. If "false", space to depth conversion is performed. Defaults to "false". *@par Outputs: -*y: An (N, H, W, C) tensor. All data types are supported. +*y: An (N, H, W, C) tensor. All types except double are supported. *@attention Constraints: *@li If reverse=true: C/(stride*stride) yields an integer result. If reverse=false: W/stride and H/stride yield integer results. +*@par Third-party framework compatibility +* It is a custom operator. It has no corresponding operator in Caffe. */ REG_OP(PassThrough) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64})) @@ -1442,6 +1600,8 @@ REG_OP(PassThrough) *@li "axis" must be less than the rank of "x". *@li The "offset" for each dimension must not exceed the maximum value of the corresponding dimension of "x". *@li The array length of "offset" plus the value of "axis" equals to the rank of "y". +*@par Third-party framework compatibility +* Compatible with the Caffe operator Crop. */ REG_OP(Crop) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32,DT_INT64,DT_UINT64})) @@ -1472,6 +1632,8 @@ REG_OP(Crop) *@attention Constraints:\n *@li "axis" must be within the rank of the input tensor. *@li "tiles" must be greater than 1. +*@par Third-party framework compatibility +* Compatible with the Caffe operator Tile. */ REG_OP(TileWithAxis) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT64, DT_INT32, @@ -1574,6 +1736,8 @@ REG_OP(StridedWrite) * *@par Outputs: *@li y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cumsum. */ REG_OP(CumulativeLogsumexp) .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) @@ -1597,6 +1761,8 @@ REG_OP(CumulativeLogsumexp) * *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Cumsum. */ REG_OP(CumulativeLogsumexpD) .INPUT(x, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16})) diff --git a/third_party/fwkacllib/inc/ops/set_ops.h b/third_party/fwkacllib/inc/ops/set_ops.h index dc9bc5c9..d9478380 100644 --- a/third_party/fwkacllib/inc/ops/set_ops.h +++ b/third_party/fwkacllib/inc/ops/set_ops.h @@ -42,6 +42,8 @@ namespace ge { *@attention Constraints:\n *-The implementation for DenseToDenseSetOperation on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow DenseToDenseSetOperation operator. */ REG_OP(DenseToDenseSetOperation) .INPUT(x1, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \ @@ -78,6 +80,8 @@ REG_OP(DenseToDenseSetOperation) *@attention Constraints:\n *-The implementation for DenseToSparseSetOperation on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow DenseToSparseSetOperation operator. */ REG_OP(DenseToSparseSetOperation) .INPUT(x1, TensorType({DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, \ @@ -119,6 +123,8 @@ REG_OP(DenseToSparseSetOperation) *@attention Constraints:\n *-The implementation for SparseToSparseSetOperation on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow SparseToSparseSetOperation operator. */ REG_OP(SparseToSparseSetOperation) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -155,6 +161,8 @@ REG_OP(SparseToSparseSetOperation) *@attention Constraints:\n *-The implementation for SetSize on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow SetSize operator. */ REG_OP(SetSize) .INPUT(set_indices, TensorType({DT_INT64})) diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h index 87f0d81b..6b5600f7 100644 --- a/third_party/fwkacllib/inc/ops/sparse_ops.h +++ b/third_party/fwkacllib/inc/ops/sparse_ops.h @@ -33,6 +33,8 @@ namespace ge { *@par Outputs: *y: A vector Tensor. 1D. Has the same type as "values". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator SparseSoftmax. */ REG_OP(SparseSoftmax) .INPUT(indices, TensorType({DT_INT64})) @@ -54,6 +56,8 @@ REG_OP(SparseSoftmax) *@par Outputs: *y: A matrix Tensor. Has the same type and same shape as "x2". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseTensorDenseAdd. */ REG_OP(SparseTensorDenseAdd) @@ -79,6 +83,8 @@ REG_OP(SparseTensorDenseAdd) *@li y_indices: The indices of the SparseTensor. Has the same type as "indices". *@li y_values: The values of the SparseTensorr. Has the same type as "values". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseReorder. */ REG_OP(SparseReorder) .INPUT(indices, TensorType({DT_INT64})) @@ -104,6 +110,8 @@ REG_OP(SparseReorder) *@li y_indices: A Tensor of type int64. The indices of the new dense shape. *@li y_shape: A Tensor of type int64. The shape of the new dense shape. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseReshape. */ REG_OP(SparseReshape) .INPUT(indices, TensorType({DT_INT64})) @@ -126,6 +134,8 @@ REG_OP(SparseReshape) *@par Outputs: *y: A Tensor. Has the same type as "x1_values". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseDenseCwiseAdd. */ REG_OP(SparseDenseCwiseAdd) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -153,6 +163,8 @@ REG_OP(SparseDenseCwiseAdd) *@par Outputs: *y: A Tensor. Has the same type as "x1_values". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseDenseCwiseDiv. */ REG_OP(SparseDenseCwiseDiv) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -180,6 +192,8 @@ REG_OP(SparseDenseCwiseDiv) *@par Outputs: *y: A Tensor. Has the same type as "x1_values". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseDenseCwiseMul. */ REG_OP(SparseDenseCwiseMul) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -211,6 +225,8 @@ REG_OP(SparseDenseCwiseMul) *@par Outputs: *handle: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AddSparseToTensorsMap. */ REG_OP(AddSparseToTensorsMap) .INPUT(indices, TensorType({DT_INT64})) @@ -235,6 +251,8 @@ REG_OP(AddSparseToTensorsMap) *@par Outputs: *y_grad: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseSliceGrad. */ REG_OP(SparseSliceGrad) .INPUT(backprop_val_grad, TensorType({ DT_INT8, DT_UINT8, DT_INT16, @@ -263,6 +281,8 @@ REG_OP(SparseSliceGrad) *y_values: A Tensor. Has the same type as "values". *y_values: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseSlice. */ REG_OP(SparseSlice) .INPUT(indices, TensorType({DT_INT64})) @@ -292,6 +312,8 @@ REG_OP(SparseSlice) *x1_val_grad: A Tensor. Has the same type as "backprop_val_grad". *x2_val_grad: A Tensor. Has the same type as "backprop_val_grad". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseAddGrad. */ REG_OP(SparseAddGrad) .INPUT(backprop_val_grad, TensorType({DT_INT8, DT_INT16, DT_INT32, @@ -316,6 +338,8 @@ REG_OP(SparseAddGrad) *@li y_value: A Tensor. Has the same type as "grad_values". *@li y_default_value: A Tensor. Has the same type as "grad_values". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseFillEmptyRowsGrad. */ REG_OP(SparseFillEmptyRowsGrad) .INPUT(reverse_index_map, TensorType({DT_INT64})) @@ -349,6 +373,8 @@ REG_OP(SparseFillEmptyRowsGrad) *@li adjoint_b: An optional bool. Defaults to "False".Use the adjoint of B in the matrix multiply. *@li If B is complex, this is transpose(conj(B)). Otherwise it is transpose(B). +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseTensorDenseMatMul. */ REG_OP(SparseTensorDenseMatMul) .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64})) @@ -376,6 +402,8 @@ REG_OP(SparseTensorDenseMatMul) *@par Outputs: *y: A Tensor. Has the same type as "values". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseToDense. */ REG_OP(SparseToDense) .INPUT(indices, TensorType({DT_INT32, DT_INT64})) @@ -411,6 +439,8 @@ REG_OP(SparseToDense) * @li y_values:A `Tensor`. Has the same type as `values`. * @li y_shape:A `Tensor` of type `int64`. +*@par Third-party framework compatibility +* Compatible SparseConcat operator in Tensorflow */ REG_OP(SparseConcat) .DYNAMIC_INPUT(indices, TensorType({DT_INT64})) @@ -452,6 +482,8 @@ REG_OP(SparseConcat) * @li sum_values:A `Tensor`. Has the same type as `x1_values`. * @li sum_shape:A `Tensor` of type `int64`. +*@par Third-party framework compatibility +* Compatible SparseAdd operator in Tensorflow */ REG_OP(SparseAdd) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -488,6 +520,8 @@ REG_OP(SparseAdd) * @li empty_row_indicator:A `Tensor` of type `bool`. * @li reverse_index_map:A `Tensor` of type `int64`. +*@par Third-party framework compatibility +* Compatible SparseFillEmptyRows operator in Tensorflow */ REG_OP(SparseFillEmptyRows) .INPUT(indices, TensorType({DT_INT64})) @@ -527,6 +561,8 @@ REG_OP(SparseFillEmptyRows) * @li y_indices:A `Tensor` of type `int64`. * @li y_values:A `Tensor`. Has the same type as `x1_values`. +*@par Third-party framework compatibility +* Compatible SparseSparseMaximum operator in Tensorflow */ REG_OP(SparseSparseMaximum) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -561,6 +597,8 @@ REG_OP(SparseSparseMaximum) * @li y_indices:A `Tensor` of type `int64`. * @li y_values:A `Tensor`. Has the same type as `x1_values`. +*@par Third-party framework compatibility +* Compatible SparseSparseMinimum operator in Tensorflow */ REG_OP(SparseSparseMinimum) .INPUT(x1_indices, TensorType({DT_INT64})) @@ -600,6 +638,8 @@ REG_OP(SparseSparseMinimum) *@par Outputs: * y:A `Tensor`. Has the same type as `input_values`. +*@par Third-party framework compatibility +* Compatible SparseReduceMax operator in Tensorflow */ REG_OP(SparseReduceMax) .INPUT(x_indices, TensorType({DT_INT64})) @@ -635,6 +675,8 @@ REG_OP(SparseReduceMax) * @li y_values:A `Tensor`. Has the same type as `input_values`. * @li y_shape:A `Tensor` of type `int64`. +*@par Third-party framework compatibility +* Compatible SparseReduceMaxSparse operator in Tensorflow */ REG_OP(SparseReduceMaxSparse) .INPUT(x_indices, TensorType({DT_INT64})) @@ -672,6 +714,8 @@ REG_OP(SparseReduceMaxSparse) * @li y_values: A Tensor. Has the same type as "input_values". * @li y_shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseReduceSum. */ REG_OP(SparseReduceSum) .INPUT(x_indices, TensorType({DT_INT64})) @@ -709,6 +753,8 @@ REG_OP(SparseReduceSum) * @li y_values: A Tensor. Has the same type as "input_values". * @li y_shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseReduceSumSparse. */ REG_OP(SparseReduceSumSparse) .INPUT(x_indices, TensorType({DT_INT64})) @@ -745,6 +791,8 @@ REG_OP(SparseReduceSumSparse) * @li y_values: A list of "num_split" Tensor objects with the same type as "values". * @li y_shape: A list of "num_split" Tensor objects of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseSplit. */ REG_OP(SparseSplit) .INPUT(split_dim, TensorType({DT_INT64})) @@ -790,6 +838,8 @@ REG_OP(SparseSplit) * @li output_values: A Tensor of type "out_type". * @li output_shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SparseCross. */ REG_OP(SparseCross) .DYNAMIC_INPUT(indices, TensorType({DT_INT64})) @@ -826,6 +876,8 @@ REG_OP(SparseCross) *@par Outputs: * handles: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator AddManySparseToTensorsMap. */ REG_OP(AddManySparseToTensorsMap) .INPUT(indices, TensorType({DT_INT64})) @@ -847,6 +899,7 @@ REG_OP(AddManySparseToTensorsMap) * The "N" serialized SparseTensor objects. *@par Attributes: +* @li dtype: A tf.DType. The "dtype" of the SparseTensor objects stored in the "SparseTensorsMap". * @li container: An optional string. Defaults to "". \n *The container name for the "SparseTensorsMap" read by this op. * @li shared_name: An optional string. Defaults to "". \n @@ -857,6 +910,8 @@ REG_OP(AddManySparseToTensorsMap) * @li values: A Tensor of type "dtype". * @li shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator TakeManySparseFromTensorsMap. */ REG_OP(TakeManySparseFromTensorsMap) .INPUT(handles, TensorType({DT_INT64})) @@ -884,6 +939,8 @@ REG_OP(TakeManySparseFromTensorsMap) *@par Outputs: * serialized_sparse: A Tensor of type "out_type". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SerializeSparse. */ REG_OP(SerializeSparse) .INPUT(indices, TensorType({DT_INT64})) @@ -910,6 +967,8 @@ REG_OP(SerializeSparse) *@par Outputs: * serialized_sparse: A Tensor of type "out_type". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SerializeManySparse. */ REG_OP(SerializeManySparse) .INPUT(indices, TensorType({DT_INT64})) @@ -937,6 +996,8 @@ REG_OP(SerializeManySparse) * @li values: A Tensor of type "dtype". * @li shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator DeserializeSparse. */ REG_OP(DeserializeSparse) .INPUT(serialized_sparse, TensorType({DT_STRING})) @@ -964,6 +1025,8 @@ REG_OP(DeserializeSparse) * @li values: A Tensor of type "dtype". * @li shape: A Tensor of type int64. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator DeserializeManySparse. */ REG_OP(DeserializeManySparse) .INPUT(serialized_sparse, TensorType({DT_STRING})) diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h index c74bebe9..53b3e848 100644 --- a/third_party/fwkacllib/inc/ops/spectral_ops.h +++ b/third_party/fwkacllib/inc/ops/spectral_ops.h @@ -34,6 +34,8 @@ namespace ge { dimension of `input` is replaced with the `fft_length / 2 + 1` unique \n frequency components of its 1D Fourier transform. +*@par Third-party framework compatibility +* Compatible with TensorFlow RFFT operator. */ REG_OP(RFFT) .INPUT(input, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h index 521d05f7..700d34b7 100644 --- a/third_party/fwkacllib/inc/ops/split_combination_ops.h +++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h @@ -24,7 +24,7 @@ namespace ge { *@par Inputs: * Two inputs, including: -*@li x: An ND Tensor. \n +*@li x: An ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@li split_dim: Must be the following type:int32. Specifies the dimension along which to split. @@ -39,6 +39,8 @@ namespace ge { *@li "num_split" is divisible by the size of dimension "split_dim". *@li "split_dim" is in the range [-len(x.shape), (x.shape)-1]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Split. */ REG_OP(Split) .INPUT(split_dim, TensorType({DT_INT32})) @@ -67,6 +69,8 @@ REG_OP(Split) *@li "num_split" is divisible by the size of dimension "split_dim". *@li "split_dim" is in the range [-len(x.shape), (x.shape)-1]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Split. */ REG_OP(SplitD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -83,7 +87,7 @@ REG_OP(SplitD) *@par Inputs: * Three inputs, including: *@li x: An ND Tensor. \n -*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 +*Must be one of the following types: *@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension. *@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split. @@ -98,6 +102,8 @@ REG_OP(SplitD) *@li "size_splits" and "num_split" have the same length. *@li The elements in "size_splits" sum to the size of dimension "split_dim". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SplitV. */ REG_OP(SplitV) .INPUT(x, TensorType::BasicType()) @@ -126,7 +132,11 @@ REG_OP(SplitV) *@attention Constraints: *@li Each element in "size_splits" is greater than or equal to 1. *@li "size_splits" and "num_split" have the same length. +Under the caffe framework, the conversion of slice_point through the cut point to cut segment is mapped to size_splits. *@li The elements in "size_splits" sum to the size of dimension "split_dim". +Under the caffe framework,size_splits or axis transformat to split_dim.Only one can effect. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SplitV. */ REG_OP(SplitVD) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_UINT8, @@ -156,6 +166,8 @@ REG_OP(SplitVD) *@par Outputs: *output_data: The concatenated tensor with same type as "values". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator ParallelConcat. */ REG_OP(ParallelConcat) .DYNAMIC_INPUT(values, TensorType({DT_FLOAT,DT_FLOAT16,DT_INT8,DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64})) @@ -169,7 +181,7 @@ REG_OP(ParallelConcat) *@par Inputs: * One input: -*x: Dynamic input.An NC1HWC0 or ND Tensor. \n +*x: Dynamic input.An NC1HWC0 or ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: @@ -182,6 +194,8 @@ REG_OP(ParallelConcat) *@li "x" is a list of at least 2 "tensor" objects of the same type. *@li "concat_dim" is in the range [-len(x.shape), len(x.shape)]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ConcatV2. */ REG_OP(ConcatV2D) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT64, DT_UINT64, DT_UINT32, DT_INT16, DT_UINT16, DT_UINT8})) @@ -195,9 +209,9 @@ REG_OP(ConcatV2D) *@par Inputs: * Two inputs, including: -*@li Dynamic input "x" is An NC1HWC0 or ND Tensor. \n +*@li Dynamic input "x" is An NC1HWC0 or ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 -*@li concat_dim: An int8, int16, int32, or int64. Specifies the dimension along which to concatenate. +*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate. *@par Attributes: *N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". No default value. @@ -208,6 +222,8 @@ REG_OP(ConcatV2D) *@attention Constraints: * "x" is a list of at least 2 "tensor" objects of the same type. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator ConcatV2. */ REG_OP(ConcatV2) .DYNAMIC_INPUT(x, TensorType::BasicType()) @@ -221,8 +237,8 @@ REG_OP(ConcatV2) *@par Inputs: * One input: -*x:Dynamic input. An NC1HWC0 or ND Tensor. \n -*Must be one of the following types: \n float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 +*x:Dynamic input. An NC1HWC0 or ND Tensor. +*Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 *@par Attributes: *@li concat_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to concatenate. No default value. @@ -235,6 +251,8 @@ REG_OP(ConcatV2) *@li "x" is a list of at least 2 "tensor" objects of the same type. *@li "concat_dim" is in the range [-len(x.shape), len(x.shape)]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Concat. */ REG_OP(ConcatD) .DYNAMIC_INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16,DT_INT8,DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64})) @@ -248,9 +266,9 @@ REG_OP(ConcatD) *@par Inputs: * Two inputs, including: -*@li x: Dynamic input.An NC1HWC0 or ND Tensor. \n +*@li x: Dynamic input.An NC1HWC0 or ND Tensor. *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64 -*@li concat_dim: An int8, int16, int32, or int64. Specifies the dimension along which to concatenate. +*@li concat_dim: An int32, or int64. Specifies the dimension along which to concatenate. *@par Attributes: *N: An optional int8, int16, int32, or int64. Specifies the number of elements in "x". @@ -262,6 +280,8 @@ REG_OP(ConcatD) *@li "x" is a list of at least 2 "tensor" objects of the same type. *@li "concat_dim" is in the range [-len(x.shape), len(x.shape)]. +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator Concat. */ REG_OP(Concat) .DYNAMIC_INPUT(x, TensorType::BasicType()) @@ -286,6 +306,8 @@ REG_OP(Concat) *@par Outputs: *y: A Tensor. Has the same type as "x". +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Pack. */ REG_OP(Pack) .DYNAMIC_INPUT(x, TensorType::BasicType()) @@ -303,11 +325,13 @@ REG_OP(Pack) * @li x: A list of 1D Tensor objects of type int32. *@par Attributes: -*@li Concat_dim: A required int. Must be within the rank of input "x". -*@li N: A required int. +*N: A required int. *@par Outputs: *y: A Tensor list with same type as "x". + +*@par Third-party framework compatibility +*@ Compatible with the TensorFlow operator ConcatOffset. */ REG_OP(ConcatOffset) .INPUT(concat_dim, TensorType({DT_INT32})) @@ -330,6 +354,9 @@ REG_OP(ConcatOffset) *@par Outputs: *y: A Tensor list with same type as "x". + +*@par Third-party framework compatibility +*@ Compatible with the TensorFlow operator ConcatOffset. */ REG_OP(ConcatOffsetD) .DYNAMIC_INPUT(x, TensorType({DT_INT32})) diff --git a/third_party/fwkacllib/inc/ops/state_ops.h b/third_party/fwkacllib/inc/ops/state_ops.h index 2b2d1362..4e759688 100644 --- a/third_party/fwkacllib/inc/ops/state_ops.h +++ b/third_party/fwkacllib/inc/ops/state_ops.h @@ -36,6 +36,9 @@ The caller does not need to pass the value of the variable tensor. *@par Outputs: *y: The created variable tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Variable. */ REG_OP(Variable) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, \ @@ -59,6 +62,9 @@ pass the reference to the variable tensor to the matching DestroyTemporaryVariab *@par Outputs: *y: The created variable tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator TemporaryVariable. */ REG_OP(TemporaryVariable) .OUTPUT(y, TensorType::ALL()) @@ -80,6 +86,9 @@ Must be the same as the "var_name" attribute of the reference to the temporary v *@par Outputs: *y: Final value of the reference to the temporary variable tensor. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator DestroyTemporaryVariable. */ REG_OP(DestroyTemporaryVariable) .INPUT(x, TensorType::ALL()) @@ -95,6 +104,9 @@ REG_OP(DestroyTemporaryVariable) *@par Outputs: *y: A tensor, indicating whether "x" has been initialized. + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator IsVariableInitialized. */ REG_OP(IsVariableInitialized) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -111,6 +123,8 @@ REG_OP(IsVariableInitialized) *@par Outputs: *y: A tensor, indicating whether "x" has been initialized, and the data type is boolean. +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator VarIsInitializedOp. */ REG_OP(VarIsInitializedOp) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT16, DT_UINT8, @@ -135,6 +149,8 @@ REG_OP(VarIsInitializedOp) *@attention Constraints:\n *-The implementation for CountUpTo on Ascend uses AICPU, with bad performance.\n +*@par Third-party framework compatibility +*@li compatible with tensorflow CountUpTo operator. */ REG_OP(CountUpTo) .INPUT(ref, TensorType({DT_INT32, DT_INT64})) diff --git a/third_party/fwkacllib/inc/ops/stateful_random_ops.h b/third_party/fwkacllib/inc/ops/stateful_random_ops.h index 9ba09dd6..eb3db1cc 100644 --- a/third_party/fwkacllib/inc/ops/stateful_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateful_random_ops.h @@ -33,6 +33,8 @@ namespace ge { *@par Outputs: *y:A Returns Non-deterministic integer values with specified shape. +*@par Third-party framework compatibility +*Compatible with tensorflow NonDeterministicInts operator. */ REG_OP(NonDeterministicInts) @@ -55,6 +57,8 @@ REG_OP(NonDeterministicInts) *@par Outputs: *y:A Returns the created operation. +*@par Third-party framework compatibility +* Compatible with tensorflow RngSkip operator. */ REG_OP(RngSkip) @@ -82,6 +86,8 @@ smaller than the range of the output (either `2^32` or `2^64`). *@par Outputs: *y:A Returns Random values with specified shape. +*@par Third-party framework compatibility +* Compatible with tensorflow StatefulRandomBinomial operator. */ REG_OP(StatefulRandomBinomial) @@ -106,6 +112,8 @@ REG_OP(StatefulRandomBinomial) *@par Outputs: *y:A Returns A tensor of the specified shape filled with random normal values. +*@par Third-party framework compatibility +* Compatible with tensorflow StatefulStandardNormalV2 operator. */ REG_OP(StatefulStandardNormalV2) @@ -129,6 +137,8 @@ REG_OP(StatefulStandardNormalV2) *@par Outputs: *y:A Returns Random values with specified shape. +*@par Third-party framework compatibility +* Compatible with tensorflow StatefulTruncatedNormal operator. */ REG_OP(StatefulTruncatedNormal) @@ -151,6 +161,8 @@ lower bound 0 is included in the range, while the upper bound 1 is excluded. \n *@par Outputs: *y:A Returns Random values with specified shape. +*@par Third-party framework compatibility +* Compatible with tensorflow StatefulUniform operator. */ REG_OP(StatefulUniform) @@ -172,6 +184,8 @@ The generated values are uniform integers covering the whole range of `dtype`. *@par Outputs: *y:A Returns Random values with specified shape. +*@par Third-party framework compatibility +* Compatible with tensorflow StatefulUniformFullInt operator. */ REG_OP(StatefulUniformFullInt) @@ -200,6 +214,8 @@ smaller than the range of the output (either `2^32` or `2^64`). *@par Outputs: *y:A Returns Random values with specified shape. +*@par Third-party framework compatibility +* Compatible with tensorflow StatefulUniformInt operator. */ REG_OP(StatefulUniformInt) diff --git a/third_party/fwkacllib/inc/ops/stateless_random_ops.h b/third_party/fwkacllib/inc/ops/stateless_random_ops.h index bb8e015a..03fc824a 100644 --- a/third_party/fwkacllib/inc/ops/stateless_random_ops.h +++ b/third_party/fwkacllib/inc/ops/stateless_random_ops.h @@ -40,6 +40,8 @@ include: \n *@see StatelessMultinomial() +*@par Third-party framework compatibility +*compatible with StatelessMultinomial op of tensorflow */ REG_OP(StatelessMultinomial) .INPUT(logits, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE})) @@ -61,6 +63,8 @@ REG_OP(StatelessMultinomial) *@par Outputs: *y: Returns Random values with specified shape. +*@par Third-party framework compatibility +* Compatible with TensorFlow StatelessRandomUniformInt operator. */ REG_OP(StatelessRandomUniformInt) diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h index 0b4701b2..d085a868 100644 --- a/third_party/fwkacllib/inc/ops/string_ops.h +++ b/third_party/fwkacllib/inc/ops/string_ops.h @@ -42,6 +42,8 @@ include: \n *@see StringSplit() +*@par Third-party framework compatibility +*compatible with StringSplit op of tensorflow */ REG_OP(StringSplit) .INPUT(input, TensorType({DT_STRING})) @@ -72,6 +74,8 @@ include: \n *@see StringSplitV2() +*@par Third-party framework compatibility +*compatible with StringSplitV2 op of tensorflow */ REG_OP(StringSplitV2) .INPUT(input, TensorType({DT_STRING})) @@ -102,6 +106,8 @@ include: \n *@see UnicodeScript() +*@par Third-party framework compatibility +*compatible with UnicodeScript op of tensorflow */ REG_OP(UnicodeScript) .INPUT(x, TensorType({DT_INT32})) @@ -127,9 +133,12 @@ include: \n *inputs are trusted or unimportant. There is a risk of adversaries\n *constructing inputs that all hash to the same bucket.\n *To prevent this problem, use a strong hash function with\n +*tf.string_to_hash_bucket_strong. *@see Substr() +*@par Third-party framework compatibility +*compatible with Substr op of tensorflow */ REG_OP(Substr) .INPUT(input, TensorType({DT_STRING})) @@ -154,9 +163,12 @@ include: \n *This function may be used when CPU time is scarce and inputs are trusted or\n *unimportant. There is a risk of adversaries constructing inputs that all hash\n *to the same bucket. To prevent this problem, use a strong hash function with\n +*tf.string_to_hash_bucket_strong. *@see StringToHashBucketFast() +*@par Third-party framework compatibility +*compatible with StringToHashBucketFast op of tensorflow */ REG_OP(StringToHashBucketFast) .INPUT(x, TensorType({DT_STRING})) @@ -185,9 +197,12 @@ include: \n * hash value distribution over buckets. This requires that the hash function\ *is seeded by a high-entropy (random) "key" unknown to the adversary. *@li The additional robustness comes at a cost of roughly 4x higher\n +*compute time than tf.string_to_hash_bucket_fast. *@see StringToHashBucketStrong() +*@par Third-party framework compatibility +*compatible with StringToHashBucketStrong op of tensorflow */ REG_OP(StringToHashBucketStrong) .INPUT(x, TensorType({DT_STRING})) @@ -211,6 +226,8 @@ include: \n *@see StringToHashBucket() +*@par Third-party framework compatibility +*compatible with StringToHashBucket op of tensorflow */ REG_OP(StringToHashBucket) .INPUT(string_tensor, TensorType({DT_STRING})) @@ -230,6 +247,8 @@ include: \n *@see StringStrip() +*@par Third-party framework compatibility +*compatible with StringStrip op of tensorflow */ REG_OP(StringStrip) .INPUT(x, TensorType({DT_STRING})) @@ -256,6 +275,8 @@ include: \n *@see StringLength() +*@par Third-party framework compatibility +*compatible with StringLength op of tensorflow */ REG_OP(StringLength) .INPUT(x, TensorType({DT_STRING})) @@ -286,6 +307,8 @@ include: \n *@see StringJoin() +*@par Third-party framework compatibility +*compatible with StringJoin op of tensorflow */ REG_OP(StringJoin) .DYNAMIC_INPUT(x, TensorType({DT_STRING})) @@ -316,6 +339,8 @@ include: \n *@see StringFormat() +*@par Third-party framework compatibility +* compatible with StringFormat op of tensorflow */ REG_OP(StringFormat) .DYNAMIC_INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \ @@ -345,6 +370,8 @@ include: \n *@see RegexFullMatch() +*@par Third-party framework compatibility +*compatible with RegexFullMatch op of tensorflow */ REG_OP(RegexFullMatch) .INPUT(x, TensorType({DT_STRING})) @@ -375,6 +402,8 @@ include: \n *@see RegexReplace() +*@par Third-party framework compatibility +*compatible with RegexReplace op of tensorflow */ REG_OP(RegexReplace) .INPUT(x, TensorType({DT_STRING})) @@ -408,6 +437,8 @@ include: \n *@see AsString() +*@par Third-party framework compatibility +*compatible with AsString op of tensorflow */ REG_OP(AsString) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \ @@ -444,6 +475,8 @@ include: \n *@see EncodeBase64() +*@par Third-party framework compatibility +*compatible with EncodeBase64 op of tensorflow */ REG_OP(EncodeBase64) .INPUT(x, TensorType({DT_STRING})) @@ -465,6 +498,8 @@ include: \n *@see DecodeBase64() +*@par Third-party framework compatibility +*compatible with DecodeBase64 op of tensorflow */ REG_OP(DecodeBase64) .INPUT(x, TensorType({DT_STRING})) diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index eb8655d0..a8258eb9 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -59,11 +59,15 @@ REG_OP(TransposeD) The returned tensor's dimension i will correspond to the input dimension perm[i]. *@par Inputs: +*Two inputs, including: *@li x: A Tensor. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64. *@li perm: A Tensor of type int32 or int64. A permutation of the dimensions of "x". *@par Outputs: *y: A Tensor. Has the same type as "x". + +*@par Third-party framework compatibility +*Compatible with the TensorFlow operator Transpose. */ REG_OP(Transpose) .INPUT(x, TensorType::BasicType()) @@ -91,16 +95,21 @@ REG_OP(Permute) .OP_END_FACTORY_REG(Permute) /** -*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors along axis 1. +*@brief Flattens the inputs. Reserves axis 0 and flattens the input tensors +* along axis 1. *@par Inputs: *One input: \n -*x: A multi-dimensional Tensor. Must be one of the following types: \n -int8, uint8, int16, uint16, int32, int64, float16, float32, float64. +*x: A multi-dimensional Tensor. Must be one of the following types: +* int8, uint8, int16, uint16, int32, uint32, int64,uint64, float16, float32. *@par Outputs: -*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors along axis 1). Must be one of the following data types: int8, uint8, int16, uint16, int32, int64, float16, float32, float64. +*y: A 2D flattened Tensor (Reserves axis 0 and flattens the input tensors +* along axis 1). Must be one of the following data types: int8, uint8, int16, +* uint16, int32, uint32, int64,uint64, float16, float32. +*@par Third-party framework compatibility +* Compatible with TensorFlow operator Flatten. */ REG_OP(Flatten) .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, @@ -115,14 +124,17 @@ REG_OP(Flatten) *@brief Permutes and crops the input tensor. *@par Inputs: -* Three inputs, including: \n -*@li x: A 5D Tensor of type float16 or float32, with format NC1HWC0. +* Three inputs, including: +*@li x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0. *@li block_shape: A 1D list or tuple of int32 or int64. -*@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to crop from start and end dimensions after permutation. +*@li crops: A 2D list or tuple of int32 or int64. Specifies the amount to +*crop from start and end dimensions after permutation. *@par Outputs: *y: A Tensor with format NC1HWC0. Has the same type as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchToSpaceND. */ REG_OP(BatchToSpaceND) .INPUT(x, TensorType::BasicType()) @@ -135,17 +147,20 @@ REG_OP(BatchToSpaceND) *@brief Permutes and crops the input tensor. *@par Inputs: -* One input: \n -*x: A 5D Tensor of type float16 or float32, with format NC1HWC0. +* One input: +*x: A 5D Tensor of type float16 or int8 or uint8, with format NC1HWC0. *@par Attributes: *@li block_shape: A required 1D list or tuple of int32 or int64. -*@li crops: A required 2D list or tuple of int32 or int64. Specifies the amount to crop from the start and end dimensions after permutation. +*@li crops: A required 2D list or tuple of int32 or int64. Specifies the amount to crop +* from the start and end dimensions after permutation. *@par Outputs: *y: A Tensor with format NC1HWC0. Has the same type as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchToSpaceND. */ REG_OP(BatchToSpaceNDD) .INPUT(x, TensorType::BasicType()) @@ -166,6 +181,8 @@ REG_OP(BatchToSpaceNDD) *@par Outputs: *y: A Tensor with format NC1HWC0. Has the same type as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SpaceToBatchND. */ REG_OP(SpaceToBatchND) .INPUT(x, TensorType::BasicType()) @@ -188,6 +205,8 @@ REG_OP(SpaceToBatchND) *@par Outputs: *y: A Tensor with format NC1HWC0. Has the same type as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SpaceToBatchND. */ REG_OP(SpaceToBatchNDD) .INPUT(x, TensorType::BasicType()) @@ -197,19 +216,24 @@ REG_OP(SpaceToBatchNDD) .OP_END_FACTORY_REG(SpaceToBatchNDD) /** -*@brief Outputs a copy of the input tensor where values from the "height" and "width" dimensions are moved to the "depth" dimension. +*@brief Outputs a copy of the input tensor where values from the "height" and +* "width" dimensions are moved to the "depth" dimension. *@par Inputs: *x: An NHWC Tensor. Must be one of the following types: -* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. +* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@par Attributes: *@li block_size: A required int, specifying the input block size. -*@li data_format: An optional string from "NHWC" and "NCHW" +*@li data_format: An optional string, specifying the data format. Defaults to +* "NHWC". *@par Outputs: *y: A Tensor. Has the same type as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SpaceToDepth. */ REG_OP(SpaceToDepth) .INPUT(x, TensorType::BasicType()) @@ -233,6 +257,9 @@ REG_OP(SpaceToDepth) *@par Outputs: *y: A Tensor of the same type as "x". + +*@par Third-party framework compatibility: +* Compatible with TensorFlow operator DepthToSpace. */ REG_OP(DepthToSpace) .INPUT(x, TensorType::BasicType()) @@ -245,23 +272,27 @@ REG_OP(DepthToSpace) *@brief Permutes data into spatial data blocks and then prunes them. *@par Inputs: -*x: A 4D Tensor with format NC1HWC0. \n +*@li x: A 4D Tensor with format NC1HWC0. +*@li crops: A 1D list or tuple of int32 or int64. *Must be one of the following types: float16, float32 *@par Attributes: -*@li crops: A required list of int8, int16, int32, or int64. No default value. -*@li block_size: A required int8, int16, int32, or int64. No default value. +*block_size: A required int8, int16, int32, or int64. No default value. *@par Outputs: -*y: A 4D Tensor with format NC1HWC0, \n +*y: A 4D Tensor with format NC1HWC0, * of type float16 or float32. *@attention Constraints: *@li The size of the first dimension of input "x" must be divisible by (block_size * block_size). -*@li "crops" is a 2D tensor of non-negative integers with shape (2, 2). -*@li block_size >= 2 +*@li "crops" is a 4Dshape [batch, height, width, depth], height = height_pad - crop_top - crop_bottom, +*width = width_pad - crop_left - crop_right. +*@li block_size > 2 + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchToSpace. */ REG_OP(BatchToSpace) .INPUT(x, TensorType::BasicType()) @@ -275,13 +306,15 @@ REG_OP(BatchToSpace) *@par Inputs: * One input: -*x: An Tensor of shape [batch*block_size*block_size, height_pad/block_size, width_pad/block_size, depth].\n +*x: An Tensor of shape [batch*block_size*block_size, height_pad/block_size, width_pad/block_size, depth]. *The batch size of the input tensor must be divisible by (block size * block size). +*Must be one of the following types: float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, +*int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@par Attributes: *@li block_size: Must be one of the following types: `int32`, `int64`. -*@li crops: An Tensor. Must be one of the following types: int32, Int64.\n -*2D tensor with non negative integer of shape [2, 2]. It specifies how many\n +*@li crops: An Tensor. Must be one of the following types: int32, Int64. +*2D tensor with non negative integer of shape [2, 2]. It specifies how many *elements are clipped from the intermediate result of spatial dimension. *@par Outputs: @@ -290,7 +323,11 @@ REG_OP(BatchToSpace) *@attention Constraints: *@li The size of the first dimension of input "x" must be divisible by (block_size * block_size). *@li "crops" is a 2D tensor of non-negative integers with shape (2, 2). -*@li block_size >= 2 +*@li block_size > 2 + + +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator BatchToSpace. */ REG_OP(BatchToSpaceD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, @@ -304,12 +341,14 @@ REG_OP(BatchToSpaceD) .OP_END_FACTORY_REG(BatchToSpaceD) /** -*@brief Outputs a copy of the input tensor where values from the "height" and "width" dimensions are padded and rearranged to the "batch" dimension. +*@brief Outputs a copy of the input tensor where values from the "height" and +* "width" dimensions are padded and rearranged to the "batch" dimension. *@par Inputs: +* Two inputs, including: *@li x: An NC1HWC0 Tensor. Must be one of the following types: -* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. - +* float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, +* int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32. *@li paddings: A 2D tensor of type int, specifying the input. *@par Attributes: @@ -317,6 +356,8 @@ REG_OP(BatchToSpaceD) *@par Outputs: *y: A Tensor. Has the same type as input "x". +*@par Third-party framework compatibility +* Compatible with the TensorFlow operator SpaceToBatch. */ REG_OP(SpaceToBatch) .INPUT(x, TensorType::BasicType()) @@ -338,6 +379,8 @@ REG_OP(SpaceToBatch) *@par Outputs: *y: A Tensor. Has the same type as input "x". +*@par Third-party framework compatibility +*@ Compatible with the TensorFlow operator SpaceToBatch. */ REG_OP(SpaceToBatchD) .INPUT(x, TensorType::BasicType()) @@ -351,7 +394,7 @@ REG_OP(SpaceToBatchD) * tensors. * @par Inputs: -* @ x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. +* x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0. * @par Attributes: * @li num: An optional int, specifying the number of tensors to be unpacked to. @@ -366,6 +409,9 @@ REG_OP(SpaceToBatchD) * @li If "num" is not specified, it is inferred from the shape of "x". * @li For the ND format, "axis" is in the range [-R, R); For the NC1HWC0 format, * "axis" must not be 2, 3, -2, or -3. + +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator Unpack. */ REG_OP(Unpack) .INPUT(x, TensorType::BasicType()) @@ -379,7 +425,9 @@ REG_OP(Unpack) * dimension of the output. * @par Inputs: -* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth]. +* x: A 4D Tensor with shape [batch, in_rows, in_cols, depth], Must be one of the +* following types:float32, double, int32, uint8, int16, int8, int64, uint16, +* float16, uint32, uint64 * @par Attributes: * @li ksizes: A required list or tuple. The size of the sliding window for each @@ -395,13 +443,15 @@ REG_OP(Unpack) * @li padding: A required string. The type of padding algorithm to use. * @par Outputs: -* Output: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *\n +* y: A 4D Tensor with shape [batch, out_rows, out_cols, ksize_rows *\n * ksize_cols * depth] containing image patches with size ksize_rows x ksize_cols\n * x depth vectorized in the "depth" dimension. Note "out_rows" and "out_cols"\n * are the dimensions of the output patches. * @attention Constraints: * "ksizes", "strides" and "rates" are lists of integers. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ExtractImagePatches. */ REG_OP(ExtractImagePatches) .INPUT(x, TensorType::RealNumberType()) @@ -434,6 +484,8 @@ REG_OP(ExtractImagePatches) * @attention Constraints: * "ksizes" and "strides" are lists of integers. +* @par Third-party framework compatibility +* Compatible with the TensorFlow operator ExtractVolumePatches. */ REG_OP(ExtractVolumePatches) .INPUT(x, TensorType::REALNUMBERTYPE()) @@ -504,6 +556,8 @@ REG_OP(ConfusionTranspose) *@attention Constraints: * "axis" and "end_axis" must be within the dimension range of the input. This operator cannot be directly called by the acllopExecute API. +*@par Third-party framework compatibility +* Compatible with the Caffe operator Flatten. */ REG_OP(FlattenV2) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h index 9a214955..137309b2 100644 --- a/third_party/fwkacllib/inc/register/op_registry.h +++ b/third_party/fwkacllib/inc/register/op_registry.h @@ -67,6 +67,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { const std::vector &GetRemoveInputConfigure(const std::string &ori_optype) const; + bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type); + private: std::unordered_map> op_ori_optype_map_; std::unordered_map op_run_mode_map_; diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 868e16ce..49c9de6a 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -114,8 +114,16 @@ typedef enum tagRtLimitType { RT_LIMIT_TYPE_LOW_POWER_TIMEOUT = 0, // timeout for power down , ms } rtLimitType_t; +typedef struct rtExceptionInfo { + uint32_t taskid; + uint32_t streamid; + uint32_t tid; +} rtExceptionInfo; + typedef void (*rtErrorCallback)(rtExceptionType); +typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); + /** * @ingroup dvrt_base * @brief stream handle. @@ -192,6 +200,14 @@ RTS_API rtError_t rtSetPollingMode(); */ RTS_API rtError_t rtSetExceptCallback(rtErrorCallback callback); +/** + * @ingroup dvrt_base + * @brief register callback for task fail + * @param [out] NA + * @return RT_ERROR_NONE for ok + */ +RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback); + /** * @ingroup dvrt_base * @brief notify handle. diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 27ee26d2..7c2a0728 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -82,6 +82,7 @@ typedef enum tagRtMemcpyKind { RT_MEMCPY_MANAGED, // managed memory RT_MEMCPY_ADDR_DEVICE_TO_DEVICE, RT_MEMCPY_HOST_TO_DEVICE_EX, // host to device ex (only used for 8 bytes) + RT_MEMCPY_DEVICE_TO_HOST_EX, // device to host ex RT_MEMCPY_RESERVED, } rtMemcpyKind_t; diff --git a/third_party/fwkacllib/inc/tdt/data_common.h b/third_party/fwkacllib/inc/tdt/data_common.h index 687d31d2..7b1d631b 100644 --- a/third_party/fwkacllib/inc/tdt/data_common.h +++ b/third_party/fwkacllib/inc/tdt/data_common.h @@ -28,7 +28,7 @@ namespace tdt { */ enum TdtDataType { TDT_IMAGE_LABEL = 0, /**< Image label*/ - TDT_T_R, + TDT_TFRECORD, /**< TF Record*/ TDT_DATA_LABEL, /**< Data label*/ TDT_END_OF_SEQUENCE, /**< End of Sequence*/ TDT_TENSOR, /**< Tensor*/