inference supports dynamic shape

pull/544/head
lichun 4 years ago
parent a973743a27
commit b8e82bb16e

@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() {
} }
} }
OpTilingManager &OpTilingManager::GetInstance() {
static OpTilingManager instance;
return instance;
}
} // namespace ge } // namespace ge

@ -25,6 +25,7 @@ using SoToHandleMap = std::map<std::string, void *>;
class OpTilingManager { class OpTilingManager {
public: public:
OpTilingManager() = default; OpTilingManager() = default;
static OpTilingManager &GetInstance();
~OpTilingManager(); ~OpTilingManager();
void LoadSo(); void LoadSo();

@ -72,7 +72,89 @@ set(SRC_LIST
"../single_op/task/tbe_task_builder.cc" "../single_op/task/tbe_task_builder.cc"
"../single_op/task/aicpu_task_builder.cc" "../single_op/task/aicpu_task_builder.cc"
"../single_op/task/aicpu_kernel_task_builder.cc" "../single_op/task/aicpu_kernel_task_builder.cc"
"../hybrid/hybrid_davinci_model_stub.cc" "../hybrid/common/tensor_value.cc"
"../hybrid/common/npu_memory_allocator.cc"
"../hybrid/executor/rt_callback_manager.cc"
"../hybrid/executor/node_state.cc"
"../hybrid/executor/node_done_manager.cc"
"../hybrid/executor/hybrid_profiler.cc"
"../hybrid/executor/hybrid_model_executor.cc"
"../hybrid/executor/hybrid_model_async_executor.cc"
"../hybrid/executor/hybrid_execution_context.cc"
"../hybrid/executor/subgraph_context.cc"
"../hybrid/executor/subgraph_executor.cc"
"../hybrid/executor/worker/task_compile_engine.cc"
"../hybrid/executor/worker/shape_inference_engine.cc"
"../hybrid/executor/worker/execution_engine.cc"
"../hybrid/model/hybrid_model.cc"
"../hybrid/model/hybrid_model_builder.cc"
"../hybrid/model/node_item.cc"
"../hybrid/model/graph_item.cc"
"../hybrid/node_executor/aicore/aicore_node_executor.cc"
"../hybrid/node_executor/aicore/aicore_op_task.cc"
"../hybrid/node_executor/aicore/aicore_task_builder.cc"
"../hybrid/node_executor/aicpu/aicpu_node_executor.cc"
"../hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"../hybrid/node_executor/ge_local/ge_local_node_executor.cc"
"../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
"../hybrid/node_executor/host_cpu/kernel_factory.cc"
"../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"../hybrid/node_executor/controlop/control_op_executor.cc"
"../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"../hybrid/node_executor/rts/rts_node_executor.cc"
"../hybrid/node_executor/node_executor.cc"
"../hybrid/node_executor/task_context.cc"
"../hybrid/hybrid_davinci_model.cc"
"../ge_local_engine/engine/host_cpu_engine.cc"
"../graph/common/omg_util.cc"
"../graph/manager/host_mem_manager.cc"
"../graph/build/memory/var_mem_assign_util.cc"
"../host_kernels/transpose_kernel.cc"
"../host_kernels/add_kernel.cc"
"../host_kernels/broadcast_args_kernel.cc"
"../host_kernels/broadcast_gradient_args_kernel.cc"
"../host_kernels/cast_kernel.cc"
"../host_kernels/concat_offset_kernel.cc"
"../host_kernels/concat_v2_kernel.cc"
"../host_kernels/dynamic_stitch_kernel.cc"
"../host_kernels/identity_kernel.cc"
"../host_kernels/empty_kernel.cc"
"../host_kernels/expanddims_kernel.cc"
"../host_kernels/fill_kernel.cc"
"../host_kernels/floordiv_kernel.cc"
"../host_kernels/floormod_kernel.cc"
"../host_kernels/gather_v2_kernel.cc"
"../host_kernels/greater_kernel.cc"
"../host_kernels/kernel_utils.cc"
"../host_kernels/maximum_kernel.cc"
"../host_kernels/mul_kernel.cc"
"../host_kernels/pack_kernel.cc"
"../host_kernels/permute_kernel.cc"
"../host_kernels/range_kernel.cc"
"../host_kernels/rank_kernel.cc"
"../host_kernels/reduce_prod_kernel.cc"
"../host_kernels/reshape_kernel.cc"
"../host_kernels/rsqrt_kernel.cc"
"../host_kernels/shape_kernel.cc"
"../host_kernels/shape_n_kernel.cc"
"../host_kernels/size_kernel.cc"
"../host_kernels/slice_d_kernel.cc"
"../host_kernels/slice_kernel.cc"
"../host_kernels/squeeze_kernel.cc"
"../host_kernels/unsqueeze_kernel.cc"
"../host_kernels/ssd_prior_box_kernel.cc"
"../host_kernels/strided_slice_kernel.cc"
"../host_kernels/sub_kernel.cc"
"../host_kernels/transdata_kernel.cc"
"../host_kernels/unpack_kernel.cc"
"../graph/passes/pass_utils.cc"
"../graph/common/bcast.cc"
"../common/fp16_t.cc"
"../common/formats/format_transfers/format_transfer_transpose.cc"
"../common/formats/utils/formats_trans_utils.cc"
) )
######## libge_executor.a ######## ######## libge_executor.a ########

@ -39,6 +39,8 @@
#include "graph/manager/graph_var_manager.h" #include "graph/manager/graph_var_manager.h"
#include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h" #include "opskernel_manager/ops_kernel_builder_manager.h"
#include "graph/opsproto_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h"
using std::string; using std::string;
using std::vector; using std::vector;
@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener {
std::shared_ptr<ge::ModelListener> listener; std::shared_ptr<ge::ModelListener> listener;
}; };
static void InitOpsProtoManger() {
string opsproto_path;
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
string file_path = RealPath(path.c_str());
if (file_path.empty()) {
GELOGE(FAILED, "File path %s is invalid.", path.c_str());
return;
}
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
GELOGI("Get opsproto so path from env : %s", path.c_str());
} else {
string path_base = PluginManager::GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}
GELOGI("Get opsproto path is %s", opsproto_path.c_str());
OpsProtoManager *manager = OpsProtoManager::Instance();
map<string, string> option_tmp;
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
(void)manager->Initialize(option_tmp);
}
GeExecutor::GeExecutor() {} GeExecutor::GeExecutor() {}
Status GeExecutor::Initialize() { Status GeExecutor::Initialize() {
@ -230,6 +259,16 @@ Status GeExecutor::Initialize() {
return ge::SUCCESS; return ge::SUCCESS;
} }
OpTilingManager::GetInstance().LoadSo();
Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
if (initHostCpuEngineStatus != SUCCESS) {
GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
return initHostCpuEngineStatus;
}
InitOpsProtoManger();
std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM); std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
mem_type.push_back(RT_MEMORY_P2P_DDR); mem_type.push_back(RT_MEMORY_P2P_DDR);
auto ret = MemManager::Instance().Initialize(mem_type); auto ret = MemManager::Instance().Initialize(mem_type);
@ -600,11 +639,17 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
return ACL_ERROR_GE_INTERNAL_ERROR; return ACL_ERROR_GE_INTERNAL_ERROR;
} }
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
uint64_t session_id = hybrid_davinci_model->GetSessionId();
VarManagerPool::Instance().RemoveVarManager(session_id);
} else {
std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id); std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
if (davinci_model != nullptr) { if (davinci_model != nullptr) {
uint64_t session_id = davinci_model->GetSessionId(); uint64_t session_id = davinci_model->GetSessionId();
VarManagerPool::Instance().RemoveVarManager(session_id); VarManagerPool::Instance().RemoveVarManager(session_id);
} }
}
ret = GraphLoader::UnloadModel(model_id); ret = GraphLoader::UnloadModel(model_id);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id); GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
@ -933,6 +978,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
*/ */
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
ge::RunModelData &run_output_data, bool async_mode) { ge::RunModelData &run_output_data, bool async_mode) {
std::vector<GeTensorDesc> input_desc = {};
std::vector<GeTensorDesc> output_desc = {};
return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode);
}
/**
* @ingroup ge
* @brief Synchronous execution of offline model(Do not create thread)
* @param [in] uint32_t model_id: Model ID to execute
void* stream: stream to execute
const domi::InputData *input_data: Model input data
const std::vector<GeTensorDesc> &input_desc: Description of model input data
bool async_mode: is asynchronize mode
* @param [out] domi::OutputData *output_data: Model output data
* @param [out] std::vector<GeTensorDesc> &output_desc: Description of model output data
* @return SUCCESS handle successfully / others handle failed
*/
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
std::vector<GeTensorDesc> &output_desc, bool async_mode) {
if (!isInit_) { if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT; return ACL_ERROR_GE_EXEC_NOT_INIT;
@ -957,7 +1022,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
} }
} }
return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data); return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
} }
/** /**

@ -61,9 +61,91 @@ local_ge_executor_src_files := \
../single_op/task/tbe_task_builder.cc \ ../single_op/task/tbe_task_builder.cc \
../single_op/task/aicpu_task_builder.cc \ ../single_op/task/aicpu_task_builder.cc \
../single_op/task/aicpu_kernel_task_builder.cc \ ../single_op/task/aicpu_kernel_task_builder.cc \
../hybrid/hybrid_davinci_model_stub.cc\
../hybrid/node_executor/aicpu/aicpu_ext_info.cc \ ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \
../graph/common/local_context.cc \ ../graph/common/local_context.cc \
../hybrid/common/tensor_value.cc \
../hybrid/common/npu_memory_allocator.cc \
../hybrid/executor/rt_callback_manager.cc \
../hybrid/executor/node_state.cc \
../hybrid/executor/node_done_manager.cc \
../hybrid/executor/hybrid_profiler.cc \
../hybrid/executor/hybrid_model_executor.cc \
../hybrid/executor/hybrid_model_async_executor.cc \
../hybrid/executor/hybrid_execution_context.cc \
../hybrid/executor/subgraph_context.cc \
../hybrid/executor/subgraph_executor.cc \
../hybrid/executor/worker/task_compile_engine.cc \
../hybrid/executor/worker/shape_inference_engine.cc \
../hybrid/executor/worker/execution_engine.cc \
../hybrid/model/hybrid_model.cc \
../hybrid/model/hybrid_model_builder.cc \
../hybrid/model/node_item.cc \
../hybrid/model/graph_item.cc \
../hybrid/node_executor/aicore/aicore_node_executor.cc \
../hybrid/node_executor/aicore/aicore_op_task.cc \
../hybrid/node_executor/aicore/aicore_task_builder.cc \
../hybrid/node_executor/aicpu/aicpu_node_executor.cc \
../hybrid/node_executor/compiledsubgraph/known_node_executor.cc \
../hybrid/node_executor/ge_local/ge_local_node_executor.cc \
../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc \
../hybrid/node_executor/host_cpu/kernel_factory.cc \
../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \
../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \
../hybrid/node_executor/controlop/control_op_executor.cc \
../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
../hybrid/node_executor/rts/rts_node_executor.cc \
../hybrid/node_executor/node_executor.cc \
../hybrid/node_executor/task_context.cc \
../hybrid/hybrid_davinci_model.cc \
../ge_local_engine/engine/host_cpu_engine.cc \
../graph/common/omg_util.cc \
../graph/manager/host_mem_manager.cc \
../graph/build/memory/var_mem_assign_util.cc \
../host_kernels/transpose_kernel.cc \
../host_kernels/add_kernel.cc \
../host_kernels/broadcast_args_kernel.cc \
../host_kernels/broadcast_gradient_args_kernel.cc \
../host_kernels/cast_kernel.cc \
../host_kernels/concat_offset_kernel.cc \
../host_kernels/concat_v2_kernel.cc \
../host_kernels/dynamic_stitch_kernel.cc \
../host_kernels/identity_kernel.cc \
../host_kernels/empty_kernel.cc \
../host_kernels/expanddims_kernel.cc \
../host_kernels/fill_kernel.cc \
../host_kernels/floordiv_kernel.cc \
../host_kernels/floormod_kernel.cc \
../host_kernels/gather_v2_kernel.cc \
../host_kernels/greater_kernel.cc \
../host_kernels/kernel_utils.cc \
../host_kernels/maximum_kernel.cc \
../host_kernels/mul_kernel.cc \
../host_kernels/pack_kernel.cc \
../host_kernels/permute_kernel.cc \
../host_kernels/range_kernel.cc \
../host_kernels/rank_kernel.cc \
../host_kernels/reduce_prod_kernel.cc \
../host_kernels/reshape_kernel.cc \
../host_kernels/rsqrt_kernel.cc \
../host_kernels/shape_kernel.cc \
../host_kernels/shape_n_kernel.cc \
../host_kernels/size_kernel.cc \
../host_kernels/slice_d_kernel.cc \
../host_kernels/slice_kernel.cc \
../host_kernels/squeeze_kernel.cc \
../host_kernels/unsqueeze_kernel.cc \
../host_kernels/ssd_prior_box_kernel.cc \
../host_kernels/strided_slice_kernel.cc \
../host_kernels/sub_kernel.cc \
../host_kernels/transdata_kernel.cc \
../host_kernels/unpack_kernel.cc \
../graph/passes/pass_utils.cc \
../graph/common/bcast.cc \
../common/fp16_t.cc \
../common/formats/format_transfers/format_transfer_transpose.cc \
../common/formats/utils/formats_trans_utils.cc \
local_ge_executor_c_include := \ local_ge_executor_c_include := \
proto/insert_op.proto \ proto/insert_op.proto \

@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
) )
############ libge_local_opskernel_builder.a ############ ############ libge_local_opskernel_builder.a ############
add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS}) add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
target_compile_options(ge_local_opskernel_builder_static PRIVATE target_compile_options(ge_local_opskernel_builder_static PRIVATE
-Werror -Werror

@ -95,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
void HostCpuEngine::CloseSo() { void HostCpuEngine::CloseSo() {
for (auto handle : lib_handles_) { for (auto handle : lib_handles_) {
if (dlclose(handle) != 0) { if (mmDlclose(handle) != 0) {
GELOGW("failed to close handle, message: %s", dlerror()); GELOGW("failed to close handle, message: %s", mmDlerror());
} }
} }
lib_handles_.clear(); lib_handles_.clear();
@ -322,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) {
Status HostCpuEngine::LoadLib(const std::string &lib_path) { Status HostCpuEngine::LoadLib(const std::string &lib_path) {
GELOGI("To invoke dlopen on lib: %s", lib_path.c_str()); GELOGI("To invoke dlopen on lib: %s", lib_path.c_str());
auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
if (handle == nullptr) { if (handle == nullptr) {
GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror()); GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror());
return INTERNAL_ERROR; return INTERNAL_ERROR;
} }
auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize"); auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize");
if (initialize != nullptr) { if (initialize != nullptr) {
GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str()); GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str());
if (initialize(HostCpuContext()) != SUCCESS) { if (initialize(HostCpuContext()) != SUCCESS) {

@ -20,7 +20,7 @@
#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
#include "graph/node.h" #include "graph/node.h"
#include "graph/operator.h" #include "graph/operator.h"
#include "register/register.h" #include "external/../register/register.h"
namespace ge { namespace ge {
class HostCpuEngine { class HostCpuEngine {

@ -30,6 +30,7 @@
#include "model/ge_model.h" #include "model/ge_model.h"
#include "graph/ge_context.h" #include "graph/ge_context.h"
#include "opskernel_manager/ops_kernel_builder_manager.h" #include "opskernel_manager/ops_kernel_builder_manager.h"
#include "graph/utils/op_desc_utils.h"
using domi::BuildMode; using domi::BuildMode;
@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id); return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
} }
static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
GE_CHECK_NOTNULL(out_anchor);
NodePtr in_node = out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);
OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
.AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
.Build();
(void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}
static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
for (auto &node : graph->GetDirectNode()) {
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == NETOUTPUT) {
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
NodePtr in_node = peer_out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);
std::string in_node_op_type = in_node->GetType();
if (in_node_op_type == CONSTANT) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
}
}
}
}
return SUCCESS;
}
Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
std::vector<SubGraphInfoPtr> &subgraph_ptr_list, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
continue; continue;
} }
GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");
if (sub_graph->GetGraphUnknownFlag()) { if (sub_graph->GetGraphUnknownFlag()) {
// unknown shape build flow // unknown shape build flow
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id), GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),

@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
/// @param [in] stream stream to execute model on /// @param [in] stream stream to execute model on
/// @param [in] async_mode is asynchronize mode. /// @param [in] async_mode is asynchronize mode.
/// @param [in] input_data model input data /// @param [in] input_data model input data
/// @param [in] input_desc description of model input data
/// @param [out] output_data model output data /// @param [out] output_data model output data
/// @param [out] output_desc description of model output data
/// ///
Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data) { const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc) {
auto model_manager = ModelManager::GetInstance(); auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager); GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data); Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ret, "Execute model failed, model_id:%u.", model_id); GELOGE(ret, "Execute model failed, model_id:%u.", model_id);
return ret; return ret;

@ -65,7 +65,8 @@ class GraphLoader {
const std::vector<uint32_t> &output_queue_ids); const std::vector<uint32_t> &output_queue_ids);
static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data); const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc);
static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id); static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);

@ -117,7 +117,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
load_end_time_(0), load_end_time_(0),
time_info_(), time_info_(),
dataInputTid(0), dataInputTid(0),
is_model_has_inited_(false), is_weight_mem_has_inited_(false),
is_feature_map_mem_has_inited_(false),
model_id_(0), model_id_(0),
runtime_model_id_(0), runtime_model_id_(0),
version_(0), version_(0),
@ -263,34 +264,65 @@ void DavinciModel::Shrink() {
ge_model_.reset(); // delete object. ge_model_.reset(); // delete object.
} }
Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
if (is_model_has_inited_) { if (is_weight_mem_has_inited_) {
GELOGE(FAILED, "call InitModelMem more than once ."); GELOGE(FAILED, "call InitWeightMem more than once.");
return FAILED; return FAILED;
} }
is_model_has_inited_ = true; is_weight_mem_has_inited_ = true;
std::size_t data_size = TotalMemSize();
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
const Buffer &weights = ge_model_->GetWeight(); const Buffer &weights = ge_model_->GetWeight();
std::size_t weights_size = weights.GetSize(); std::size_t weights_size = weights.GetSize();
GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE); GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE);
if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
return FAILED; return FAILED;
} }
if ((weight_ptr != nullptr) && (weight_size < weights_size)) { weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size); is_inner_weight_base_ = false;
if (weights_size != 0) {
weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
is_inner_weight_base_ = false;
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
}
is_inner_weight_base_ = true;
}
GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
weights_mem_base_, weights_size);
GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
GELOGI("copy weights data to device");
}
runtime_param_.weight_base = weights_mem_base_;
return SUCCESS;
}
Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (is_feature_map_mem_has_inited_) {
GELOGE(FAILED, "call InitFeatureMapMem more than once .");
return FAILED;
}
is_feature_map_mem_has_inited_ = true;
std::size_t data_size = TotalMemSize();
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return FAILED; return FAILED;
} }
mem_base_ = static_cast<uint8_t *>(dev_ptr); mem_base_ = static_cast<uint8_t *>(dev_ptr);
p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr); p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
is_inner_mem_base_ = false; is_inner_mem_base_ = false;
is_inner_weight_base_ = false;
if (TotalMemSize() && mem_base_ == nullptr) { if (TotalMemSize() && mem_base_ == nullptr) {
mem_base_ = MallocFeatureMapMem(data_size); mem_base_ = MallocFeatureMapMem(data_size);
@ -298,13 +330,15 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
} }
GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
mem_base_, data_size); mem_base_, data_size);
weights_mem_base_ = mem_base_;
is_inner_mem_base_ = true; if (!is_inner_weight_base_) {
weights_mem_base_ = mem_base_;
is_inner_weight_base_ = true; is_inner_weight_base_ = true;
} }
is_inner_mem_base_ = true;
}
if (p2p_data_size != 0) { if (p2p_data_size != 0) {
p2p_mem_base_ = MallocP2PMem(p2p_data_size); p2p_mem_base_ = MallocP2PMem(p2p_data_size);
@ -312,27 +346,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return GE_EXEC_ALLOC_P2P_MEM_FAILED; return GE_EXEC_ALLOC_P2P_MEM_FAILED;
} }
GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
p2p_mem_base_, p2p_data_size); p2p_mem_base_, p2p_data_size);
is_inner_p2p_mem_base_ = true; is_inner_p2p_mem_base_ = true;
} }
if (weights_size != 0) {
weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
is_inner_weight_base_ = false;
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
}
is_inner_weight_base_ = true;
}
GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
weights_mem_base_, weights_size);
GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
}
GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
runtime_param_.mem_base = mem_base_; runtime_param_.mem_base = mem_base_;
runtime_param_.weight_base = weights_mem_base_; runtime_param_.weight_base = weights_mem_base_;
@ -642,8 +660,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
GE_TIMESTAMP_START(InitModelMem); GE_TIMESTAMP_START(InitModelMem);
GELOGD("Known node is %d", known_node_); GELOGD("Known node is %d", known_node_);
GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size));
if (!known_node_) { if (!known_node_) {
GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
data_inputer_ = new (std::nothrow) DataInputer(); data_inputer_ = new (std::nothrow) DataInputer();
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr."); GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
} }
@ -1140,6 +1159,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
} }
return SUCCESS; return SUCCESS;
} }

@ -584,7 +584,8 @@ class DavinciModel {
Status SyncVarData(); Status SyncVarData();
Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize); Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);
void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);
@ -850,7 +851,9 @@ class DavinciModel {
Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);
bool is_model_has_inited_; bool is_weight_mem_has_inited_;
bool is_feature_map_mem_has_inited_;
uint32_t model_id_; uint32_t model_id_;
uint32_t runtime_model_id_; uint32_t runtime_model_id_;
string name_; string name_;

@ -31,6 +31,7 @@
#include "model/ge_root_model.h" #include "model/ge_root_model.h"
#include "graph/common/local_context.h" #include "graph/common/local_context.h"
#include "common/formats/utils/formats_trans_utils.h" #include "common/formats/utils/formats_trans_utils.h"
#include "hybrid/hybrid_davinci_model.h"
namespace ge { namespace ge {
thread_local uint32_t device_count = 0; thread_local uint32_t device_count = 0;
@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::mutex> lock(map_mutex_); std::lock_guard<std::mutex> lock(map_mutex_);
auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
if (hybrid_davinci_model != hybrid_model_map_.end()) {
uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
DestroyAicpuSession(session_id);
return SUCCESS;
}
auto it = model_map_.find(model_id); auto it = model_map_.find(model_id);
if (it == model_map_.end()) { if (it == model_map_.end()) {
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
vector<InputOutputDescInfo> &output_desc, vector<InputOutputDescInfo> &output_desc,
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats, std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats,
bool new_model_desc) { bool new_model_desc) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
hybrid_davinci_model->SetModelDescVersion(new_model_desc);
return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats);
}
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
/// ///
Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type) { int32_t &dynamic_type) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type);
}
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetDynamicBatchInfo failed, Invalid model id %u!", model_id); "GetDynamicBatchInfo failed, Invalid model id %u!", model_id);
@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect
/// ///
Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
std::vector<std::string> &user_input_shape_order) { std::vector<std::string> &user_input_shape_order) {
auto hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order);
return SUCCESS;
}
auto davinci_model = GetModel(model_id); auto davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id)
@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &
} }
Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) { Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model); GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetModelAttr(dynamic_output_shape_info); davinci_model->GetModelAttr(dynamic_output_shape_info);
@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
/// @param [in] stream model stream /// @param [in] stream model stream
/// @param [in] async_mode is asynchronize mode. /// @param [in] async_mode is asynchronize mode.
/// @param [in] input_data input data /// @param [in] input_data input data
/// @param [in] input_desc description of input data
/// @param [out] output_data output data /// @param [out] output_data output data
/// @param [out] output_desc description of output data
/// ///
Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data) { const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc) {
std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
if (hybrid_davinci_model != nullptr) {
auto inputs = input_data.blobs;
auto outputs = output_data.blobs;
Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream);
if (status == SUCCESS) {
GELOGI("Execute model %u success.", model_id);
}
return status;
}
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id);

@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
/// @param [in] stream model stream /// @param [in] stream model stream
/// @param [in] async_mode is asynchronize mode. /// @param [in] async_mode is asynchronize mode.
/// @param [in] input_data model input data /// @param [in] input_data model input data
/// @param [in] input_desc description of model input data
/// @param [out] output_data model output data /// @param [out] output_data model output data
/// @param [out] output_desc description of model output data
/// ///
ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data); const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc);
ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs); ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);

@ -26,6 +26,7 @@
#include <vector> #include <vector>
#include "common/ge/ge_util.h" #include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h" #include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "framework/common/types.h" #include "framework/common/types.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
@ -72,7 +73,7 @@ Status DynamicShapePartitioner::Partition() {
} }
REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true), REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true),
"Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str()); "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!");
DumpGraph("_Before_DSP"); DumpGraph("_Before_DSP");
auto status = PartitionImpl(); auto status = PartitionImpl();
GELOGD("%s.", DebugString().c_str()); GELOGD("%s.", DebugString().c_str());
@ -86,6 +87,50 @@ Status DynamicShapePartitioner::Partition() {
return status; return status;
} }
Status DynamicShapePartitioner::CtrlEdgeTransfer() {
GELOGD("Do ctrl edge transfer start!");
GE_CHECK_NOTNULL(root_graph_);
bool is_dynamic_shape = false;
(void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
if (!is_dynamic_shape) {
return SUCCESS;
}
for (auto &subgraph : root_graph_->GetAllSubgraphs()) {
for (ge::NodePtr &n : subgraph->GetDirectNode()) {
auto op_desc = n->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == CONSTANT || op_type == CONSTANTOP) {
if (n->GetInAllNodes().empty()) {
GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str());
continue;
}
GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str());
for (auto &in_control_node : n->GetInControlNodes()) {
GE_CHECK_NOTNULL(in_control_node);
GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(),
n->GetInControlAnchor()), "remove edge failed");
for (auto &out_node : n->GetOutNodes()) {
if (out_node == nullptr) {
continue;
}
GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(),
out_node->GetInControlAnchor()), "add edge failed.");
}
}
}
}
}
GELOGD("Do ctrl edge transfer end!");
return SUCCESS;
}
Status DynamicShapePartitioner::PartitionImpl() { Status DynamicShapePartitioner::PartitionImpl() {
REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed."); REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed.");
REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes."); REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes.");

@ -151,6 +151,7 @@ class DynamicShapePartitioner {
Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow); Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow);
Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow); Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow);
bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor); bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor);
Status CtrlEdgeTransfer();
ge::ComputeGraphPtr root_graph_; // The original graph to partition ge::ComputeGraphPtr root_graph_; // The original graph to partition
std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // Record nodes and the cluster it belongs to std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // Record nodes and the cluster it belongs to
// topological sorted clusters, this field will change with the splitting. // topological sorted clusters, this field will change with the splitting.

@ -37,10 +37,6 @@
#include "graph/utils/type_utils.h" #include "graph/utils/type_utils.h"
namespace ge { namespace ge {
namespace {
const uint32_t kShapeDimSize = 1;
const uint32_t DIM_SIZE_TWO = 2;
} // namespace
Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data, Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data,
std::vector<GeTensorPtr> &v_output, const bool scalar_output) { std::vector<GeTensorPtr> &v_output, const bool scalar_output) {

@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No
GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return ""); GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return "");
if (node->GetType() == CAST) { if (node->GetType() == CAST) {
trans_data_type = true; trans_data_type = true;
} else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) { } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) {
trans_format = true; trans_format = true;
trans_shape = true; trans_shape = true;
} else if (node->GetType() == TRANSDATA) { } else if (node->GetType() == TRANSDATA) {

@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES
) )
############ libhost_cpu_opskernel_builder.a ############ ############ libhost_cpu_opskernel_builder.a ############
add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST}) add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})
target_compile_options(host_cpu_opskernel_builder_static PRIVATE target_compile_options(host_cpu_opskernel_builder_static PRIVATE
-Werror -Werror

@ -112,8 +112,8 @@ void FloorDivKernel::ShapeCal(const std::vector<ge::ConstGeTensorPtr> &input, Ge
template <typename T> template <typename T>
T FloorDivKernel::DivCal(const T &x_i, const T &y_i) { T FloorDivKernel::DivCal(const T &x_i, const T &y_i) {
if ((x_i < static_cast<T>(0)) != (y_i < static_cast<T>(0))) { if ((x_i < static_cast<T>(0)) != (y_i < static_cast<T>(0))) {
T abs_x_i = std::abs(x_i); T abs_x_i = x_i < 0 ? -x_i : x_i;
T abs_y_i = std::abs(y_i); T abs_y_i = y_i < 0 ? -y_i : y_i;
return static_cast<T>(static_cast<int32_t>(-(abs_x_i + abs_y_i - 1) / abs_y_i)); return static_cast<T>(static_cast<int32_t>(-(abs_x_i + abs_y_i - 1) / abs_y_i));
} else { } else {
return static_cast<T>(static_cast<int32_t>(x_i / y_i)); return static_cast<T>(static_cast<int32_t>(x_i / y_i));

@ -40,10 +40,6 @@ class FloorDivKernel : public Kernel {
template <typename T> template <typename T>
Status DataCal(const std::vector<ConstGeTensorPtr> &input, ge::GeTensorPtr output_ptr); Status DataCal(const std::vector<ConstGeTensorPtr> &input, ge::GeTensorPtr output_ptr);
Status ComputeByDataType(DataType data_type, const std::vector<ConstGeTensorPtr> &input, GeTensorPtr output_ptr); Status ComputeByDataType(DataType data_type, const std::vector<ConstGeTensorPtr> &input, GeTensorPtr output_ptr);
int64_t axis_dim_;
int64_t head_dim_;
int64_t end_dim_;
}; };
} // namespace ge } // namespace ge

@ -187,7 +187,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin
return PARAM_INVALID; return PARAM_INVALID;
} }
uint tmp_value = aspect_ratios_size * min_sizes_size; uint32_t tmp_value = aspect_ratios_size * min_sizes_size;
if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) { if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) {
GELOGW("Failed to get list param."); GELOGW("Failed to get list param.");
return PARAM_INVALID; return PARAM_INVALID;
@ -288,7 +288,7 @@ std::unique_ptr<float[]> SsdPriorboxKernel::BoundaryCalulate(int dim_size, int l
} }
} }
return std::move(output_data); return output_data;
} }
Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_output) { Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_output) {

@ -77,7 +77,7 @@ do { \
RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__) RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name, ##__VA_ARGS__)
#define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \ #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \
RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name, ##__VA_ARGS__) RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACKS, fmt, "Callback", name, ##__VA_ARGS__)
} // namespace hybrid } // namespace hybrid
} // namespace ge } // namespace ge
#endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_ #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_

@ -353,6 +353,44 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
return SUCCESS; return SUCCESS;
} }
Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs,
const std::vector<GeTensorDesc> &input_desc,
std::vector<DataBuffer> &outputs,
std::vector<GeTensorDesc> &output_desc) {
GELOGI("Start to execute model.");
HybridModelExecutor::ExecuteArgs args;
args.inputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
TensorValue tensor_value(inputs[i].data, inputs[i].length);
args.inputs[i] = tensor_value;
}
GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
for (const auto &output_tensor_desc : args.output_desc) {
output_desc.emplace_back(*output_tensor_desc);
}
for (size_t i = 0; i < args.outputs.size(); ++i) {
int64_t output_real_size = 0;
ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size);
if (graph_status != GRAPH_SUCCESS) {
GELOGE(FAILED, "Get tensor size in bytes failed.");
return FAILED;
}
if (output_real_size > 0) {
if (outputs[i].length < static_cast<uint64_t>(output_real_size)) {
GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]",
i, outputs[i].length, output_real_size);
return FAILED;
}
GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE));
}
outputs[i].length = output_real_size;
}
return SUCCESS;
}
Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) { Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
GELOGD("Start to execute model."); GELOGD("Start to execute model.");
// prepare inputs // prepare inputs

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save