!712 update c76 code

Merge pull request !712 from 王涛/r1.2.0
pull/712/MERGE
计晨 4 years ago committed by Gitee
commit fe5db33358

@ -272,6 +272,7 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
std::vector<int64_t> dynamic_shape_dims = {kDynamicDimValue};
GeShape dynamic_shape(dynamic_shape_dims);
std::vector<std::pair<int64_t, int64_t>> dynamic_shape_range;
ge::GeTensor inputTensor;
ge::GeTensorDesc desc(input_desc);
@ -280,6 +281,7 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
if (!is_const && shape_ori.GetDims().size() > 0) {
desc.SetShape(dynamic_shape);
desc.SetShapeRange(dynamic_shape_range);
}
inputTensor.SetTensorDesc(desc);

File diff suppressed because it is too large Load Diff

@ -529,15 +529,6 @@ class DavinciModel {
struct timeInfo time_info_;
int32_t dataInputTid;
///
/// @ingroup ge
/// @brief Save Batch label Info.
/// @param [in] const OpDescPtr &op_desc
/// @param [in] uintptr_t addr: address value in args block.
/// @return None.
///
void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr);
///
/// @ingroup ge
/// @brief Copy Check input size and model op size.
@ -649,14 +640,6 @@ class DavinciModel {
///
void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index);
///
/// @ingroup ge
/// @brief input zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitInputZeroCopyNodes(const NodePtr &node);
///
/// @ingroup ge
/// @brief NetOutput Op Initialize.
@ -665,30 +648,6 @@ class DavinciModel {
///
Status InitNetOutput(const NodePtr &node);
///
/// @ingroup ge
/// @brief output zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitOutputZeroCopyNodes(const NodePtr &node);
///
/// @ingroup ge
/// @brief input zero copy node Initialize for Case.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitInputBatchLabel(const NodePtr &node);
///
/// @ingroup ge
/// @brief output zero copy node Initialize for Case.
/// @param [in] NodePtr: netoutput Op.
/// @return Status
///
Status InitOutputBatchLabel(const NodePtr &node);
///
/// @ingroup ge
/// @brief Constant Op Init.
@ -914,11 +873,6 @@ class DavinciModel {
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
std::set<const void *> copy_only_addrs_; // Address need copy to original place.
// {op_id, batch_label}
std::map<int64_t, std::string> zero_copy_op_id_batch_label_;
// {batch_label, addrs}
std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_;
std::vector<TaskInfoPtr> task_list_;
// rt_moodel_handle
rtModel_t rt_model_handle_;

@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
addr_count_ = out_count;
}
bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
bool set_batch_label_flag = false;
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
auto &addrs_mapping_list = GetOutsideAddrs();
auto args_addrs = addrs_mapping_list[out_count].find(outside_addr);
if (args_addrs != addrs_mapping_list[out_count].end()) {
auto args_addrs = outside_addrs_[out_count].find(outside_addr);
if (args_addrs != outside_addrs_[out_count].end()) {
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid.");
void *args_val = static_cast<uint8_t *>(args) + offset;
args_addrs->second.push_back(args_val);
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
args, offset);
set_batch_label_flag = true;
}
}
return set_batch_label_flag;
}
} // namespace ge

@ -51,7 +51,7 @@ class ZeroCopyOffset {
const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag);
void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
std::vector<void *> &tensor_addrs);
bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);
void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset);
// basic_addr of l2-fusion
void *GetBasicAddr() const { return basic_addr_; }

@ -22,8 +22,6 @@
#include "common/ge_compiler_options.h"
namespace ge {
const char *const kDefaultBatchLable = "Batch_default";
ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size)
: name_(name), args_addr_(args), args_size_(size), is_updated_(false) {}
@ -66,59 +64,18 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) {
const uint8_t *data = static_cast<const uint8_t *>(info);
args_info_.assign(data, data + size);
GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
args_addr_, args_size_, size);
}
/**
* @ingroup ge
* @brief Check is dynamic batch node.
* @param [in] addr: virtual address value from Op.
* @param [in] data: data buffer from user.
* @param [in] batch_addrs: dynamic batch addr info.
* @param [in] batch_label: batch label.
* @return: true / false
*/
bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label,
uintptr_t addr) {
// Used for dynamic batch / resolution scene
set<uintptr_t> dynamic_input_addrs;
auto dynamic_input_iter = batch_addrs.find(batch_label);
if (dynamic_input_iter != batch_addrs.end()) {
dynamic_input_addrs = dynamic_input_iter->second;
}
set<uintptr_t> fix_input_addrs;
auto fix_input_iter = batch_addrs.find(kDefaultBatchLable);
if (fix_input_iter != batch_addrs.end()) {
fix_input_addrs = fix_input_iter->second;
}
if (fix_input_addrs.empty()) {
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) {
return false;
}
} else {
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() &&
fix_input_addrs.find(addr) == fix_input_addrs.end()) {
return false;
}
}
return true;
}
/**
* @ingroup ge
* @brief Set user data addr to Task param.
* @param [in] addr: virtual address value from Op.
* @param [in] buffer_addr: real_data_buffer_addr from user.
* @param [in] batch_addrs: dynamic batch addr info.
* @param [in] batch_label: batch label.
* @return: void
*/
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs,
const string &batch_label) {
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) {
auto iter = task_addr_offset_.find(addr);
if (iter != task_addr_offset_.end()) {
auto &cur_pair = *iter;

@ -550,7 +550,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph, compute_graph, session_id,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@ -565,7 +566,8 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
}
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this,
compute_graph->GetGraphID(), subgraph, compute_graph, session_id,
compute_graph->GetGraphID(), subgraph,
compute_graph->GetName(), session_id,
GetThreadLocalContext());
if (!f.valid()) {
GELOGE(FAILED, "Future is invalid");
@ -2471,7 +2473,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id,
const SubGraphInfoPtr &sub_graph_info_ptr,
const ComputeGraphPtr &compute_graph, uint64_t session_id,
const std::string &root_graph_name,
uint64_t session_id,
const GEThreadLocalContext &ge_context) {
if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) {
GetContext().SetSessionId(session_id);
@ -2488,9 +2491,13 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id);
return FAILED;
}
if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) {
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \
root_graph_name: %s.", root_graph_name.c_str());
return FAILED;
}
compute_graph_tmp->SetSessionID(session_id);
Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp,
compute_graph,
engine_name);
if (ret != SUCCESS) {
GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str());

@ -219,7 +219,8 @@ class GraphManager {
static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id,
const SubGraphInfoPtr &sub_graph_info_ptr,
const ComputeGraphPtr &compute_graph, uint64_t session_id,
const std::string &root_graph_name,
uint64_t session_id,
const GEThreadLocalContext &ge_context);
Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor);
void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor);

@ -76,8 +76,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) {
}
}
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph,
const std::string &engine_name) {
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name) {
if (compute_graph == nullptr) {
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr.");
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
@ -106,10 +105,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const Com
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) {
Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph));
if (ret != SUCCESS) {
auto root_graph = ge::GraphUtils::FindRootGraph(parent_graph);
if (root_graph != nullptr) {
ErrorManager::GetInstance().SaveMstuneCompileFailedMsg(root_graph->GetName());
}
GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret);
return ret;
}

@ -42,8 +42,7 @@ class GraphOptimize {
~GraphOptimize() = default;
// subgraph optimize
Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph,
const std::string &engine_name);
Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name);
// original graph optimize
Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph);

@ -18,6 +18,7 @@
#include <map>
#include <set>
#include <string>
#include <utility>
#include "common/formats/format_transfers/format_transfer_fractal_nz.h"
#include "common/formats/format_transfers/format_transfer_fractal_z.h"
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
@ -27,9 +28,13 @@
#include "common/helper/model_helper.h"
#include "common/math/math_util.h"
#include "common/op/ge_op_utils.h"
#include "common/util/error_manager/error_manager.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "framework/common/debug/ge_log.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/common/local_context.h"
#include "graph/common/transop_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/shape_refiner.h"
#include "graph/manager/graph_var_manager.h"
@ -39,21 +44,29 @@
#include "graph/passes/aicpu_constant_folding_pass.h"
#include "graph/passes/assert_pass.h"
#include "graph/passes/assign_pass.h"
#include "graph/passes/base_pass.h"
#include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/cond_pass.h"
#include "graph/passes/cond_remove_pass.h"
#include "graph/passes/constant_folding_pass.h"
#include "graph/passes/constant_fuse_same_pass.h"
#include "graph/passes/control_trigger_pass.h"
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/dropout_pass.h"
#include "graph/passes/enter_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/for_pass.h"
#include "graph/passes/get_original_format_pass.h"
#include "graph/passes/guarantee_const_pass.h"
#include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/infershape_pass.h"
#include "graph/passes/iterator_op_pass.h"
#include "graph/passes/merge_pass.h"
#include "graph/passes/net_output_pass.h"
#include "graph/passes/next_iteration_pass.h"
#include "graph/passes/no_use_reshape_remove_pass.h"
#include "graph/passes/parallel_concat_start_op_pass.h"
#include "graph/passes/placeholder_with_default_pass.h"
@ -68,18 +81,45 @@
#include "graph/passes/shape_operate_op_remove_pass.h"
#include "graph/passes/snapshot_pass.h"
#include "graph/passes/stop_gradient_pass.h"
#include "graph/passes/subgraph_pass.h"
#include "graph/passes/switch_data_edges_bypass.h"
#include "graph/passes/switch_dead_branch_elimination.h"
#include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/merge_to_stream_merge_pass.h"
#include "graph/passes/switch_to_stream_switch_pass.h"
#include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/unused_const_pass.h"
#include "graph/passes/unused_op_remove_pass.h"
#include "graph/passes/var_is_initialized_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/preprocess/insert_op/util_insert_aipp_op.h"
#include "graph/types.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "inc/pass_manager.h"
#include "init/gelib.h"
#include "multi_batch_copy_graph.h"
#include "runtime/dev.h"
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h"
#include "graph/passes/permute_pass.h"
#include "graph/passes/reshape_remove_pass.h"
#include "graph/passes/same_transdata_breadth_fusion_pass.h"
#include "graph/passes/transop_breadth_fusion_pass.h"
#include "graph/passes/transop_depth_fusion_pass.h"
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h"
#include "graph/passes/cast_remove_pass.h"
#include "graph/passes/data_pass.h"
#include "graph/passes/transop_without_reshape_fusion_pass.h"
#include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/mark_agnostic_pass.h"
namespace ge {
namespace {
static std::map<std::string, ge::DataType> output_type_str_to_datatype = {

@ -22,6 +22,7 @@
#include "common/blocking_queue.h"
#include "common/properties_manager.h"
#include "framework/common/debug/ge_log.h"
#include "graph/ge_local_context.h"
#include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/common/tensor_value.h"
#include "hybrid/executor/hybrid_profiler.h"
@ -38,6 +39,7 @@ struct GraphExecutionContext {
uint64_t session_id = 0;
const HybridModel *model = nullptr;
const GEThreadLocalContext *ge_context = nullptr;
rtStream_t stream = nullptr;
rtContext_t rt_context = nullptr;
rtContext_t rt_gen_context = nullptr;

@ -95,6 +95,7 @@ Status HybridModelExecutor::InitExecutionContext() {
context_.stream = stream_;
context_.model = model_;
context_.session_id = ::ge::GetContext().SessionId();
context_.ge_context = &GetThreadLocalContext();
GELOGD("session id from model = %lu, from context = %lu", model_->GetSessionId(), context_.session_id);
context_.allocator = NpuMemoryAllocator::GetAllocator(device_id_);
GE_CHECK_NOTNULL(context_.allocator);

@ -26,6 +26,9 @@ Status TaskCompileEngine::Compile(NodeState &node_state, GraphExecutionContext *
RECORD_COMPILE_EVENT(context, node_item.NodeName().c_str(), "[Compile] Start");
GE_CHK_RT_RET(rtCtxSetCurrent(context->rt_gen_context));
if (context->ge_context != nullptr) {
GetThreadLocalContext() = *context->ge_context;
}
shared_ptr<NodeTask> kernel_task;
auto ret = node_item.node_executor->CompileTask(*context->model, node_item.node, kernel_task);
RECORD_COMPILE_EVENT(context, node_state.GetName().c_str(), "[Compile] End");

@ -39,7 +39,7 @@ size_t GetAlignedSize(size_t size) {
}
Status ProfilingTaskInfo(OpTask *op_task) {
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) {
if (!ProfilingManager::Instance().ProfilingModelLoadOn()) {
return SUCCESS;
}

@ -119,11 +119,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam &param, bool keep_works
uintptr_t *arg_base = nullptr;
size_t arg_num = 0;
GetIoAddr(arg_base, arg_num);
if (arg_num != all_addresses.size()) {
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect = %zu, but got = %zu",
if (arg_num < all_addresses.size()) {
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu",
op_desc_->GetName().c_str(),
arg_num,
all_addresses.size());
all_addresses.size(),
arg_num);
return INTERNAL_ERROR;
}

@ -392,19 +392,9 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT,
OP_BANK_PATH};
// for interface: aclgrphParse
const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT,
INPUT_SHAPE,
OP_NAME_MAP,
IS_DYNAMIC_INPUT,
INPUT_FP16_NODES,
IS_INPUT_ADJUST_HW_LAYOUT,
IS_OUTPUT_ADJUST_HW_LAYOUT,
OUTPUT,
OUTPUT_TYPE,
OUT_NODES,
COMPRESS_WEIGHT_CONF,
ENABLE_SCOPE_FUSION_PASSES,
LOG_LEVEL};
const std::set<std::string> ir_parser_suppported_options = {
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT,
OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES};
// for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {CORE_TYPE,

@ -1 +1 @@
Subproject commit 5a1b0ab95e2d205ee9ee578ac4bcde4f4fbed6d8
Subproject commit a71110f5e42dc768ddbbd51289eb467518dedf9b

@ -1 +1 @@
Subproject commit 77dc42c383e416ed4a0f606ddc3c02cdaa082ac3
Subproject commit 2e55b1168df38cd3c76412a8d00bc8b6e7f19f82
Loading…
Cancel
Save