!1300 Update GE commit id

From: @shenwei41
Reviewed-by: @xsmq,@liucunwei
Signed-off-by: @liucunwei
pull/1300/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit d9d99c3cf5

4
.gitmodules vendored

@ -1,8 +1,8 @@
[submodule "parser"]
path = parser
url = https://gitee.com/ascend/parser.git
branch = master
branch = r1.3.0
[submodule "metadef"]
path = metadef
url = https://gitee.com/ascend/metadef.git
branch = master
branch = r1.3.0

@ -937,6 +937,10 @@ add_library(atc_stub_ge_compiler SHARED
add_dependencies(atc_stub_ge_compiler ge_stub)
target_compile_options(atc_stub_ge_compiler PRIVATE
-fno-common
)
target_link_libraries(atc_stub_ge_compiler PRIVATE
$<BUILD_INTERFACE:intf_pub>
)

@ -171,17 +171,17 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
// GE finalize, releasing all resources
Status GEFinalize() {
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize);
GELOGT(TRACE_INIT, "GEFinalize start");
ErrorManager::GetInstance().GenWorkStreamIdDefault();
std::lock_guard<std::mutex> lock(g_ge_release_mutex);
// check init status
if (!g_ge_initialized) {
GELOGW("GEFinalize is called before GEInitialize");
GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize");
return SUCCESS;
}
std::lock_guard<std::mutex> lock(g_ge_release_mutex);
ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize);
ErrorManager::GetInstance().GenWorkStreamIdDefault();
GELOGT(TRACE_INIT, "GEFinalize start");
// call Finalize
Status ret = SUCCESS;
Status middle_ret;

@ -212,6 +212,7 @@ target_link_libraries(ge_executor PRIVATE
add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS})
target_compile_options(ge_executor_shared PRIVATE
-fno-common
-Werror
-O2
-Wno-deprecated-declarations

@ -38,6 +38,7 @@ REGISTER_OP_CREATOR(ExpandDims, GeDeletedOp);
REGISTER_OP_CREATOR(Reshape, GeDeletedOp);
REGISTER_OP_CREATOR(ReFormat, GeDeletedOp);
REGISTER_OP_CREATOR(Squeeze, GeDeletedOp);
REGISTER_OP_CREATOR(Unsqueeze, GeDeletedOp);
REGISTER_OP_CREATOR(Size, GeDeletedOp);
REGISTER_OP_CREATOR(Shape, GeDeletedOp);
REGISTER_OP_CREATOR(ShapeN, GeDeletedOp);

@ -16,14 +16,12 @@
#include "ge_runtime/task/label_goto_task.h"
#include "ge_runtime/task/task_factory.h"
#include "framework/common/util.h"
namespace ge {
namespace model_runner {
LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info)
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info),
task_info_(task_info),
stream_(nullptr),
label_(nullptr) {
: TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) {
if (task_info_ == nullptr) {
GELOGW("task_info_ is null!");
return;
@ -42,29 +40,78 @@ LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::share
label_ = label_list[label_id];
}
LabelGotoTask::~LabelGotoTask() {}
LabelGotoTask::~LabelGotoTask() {
GE_FREE_RT_LOG(label_info_);
GE_FREE_RT_LOG(index_value_);
}
bool LabelGotoTask::Distribute() {
GELOGI("LabelGotoTask Distribute start.");
if (!CheckParamValid()) {
return false;
}
const std::vector<void *> label_list = { label_ };
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
return false;
}
uint64_t branch_index = 0;
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
return false;
}
uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size();
rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
return false;
}
rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
return false;
}
rt_ret = rtLabelSwitchByIndex(index_value_, label_list.size(), label_info_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
return false;
}
GELOGI("DistributeTask end.");
return true;
}
bool LabelGotoTask::CheckParamValid() {
if (stream_ == nullptr) {
GELOGE(PARAM_INVALID, "stream is null!");
return false;
}
if (label_ == nullptr) {
GELOGE(PARAM_INVALID, "label is null!");
return false;
}
rtError_t rt_ret = rtLabelGotoEx(label_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
if (label_info_ != nullptr) {
GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
return false;
}
if (index_value_ != nullptr) {
GELOGE(PARAM_INVALID, "index_value_ has dirty data.");
return false;
}
GELOGI("DistributeTask end.");
return true;
}
REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo);
} // namespace model_runner
} // namespace ge

@ -31,9 +31,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> {
bool Distribute() override;
private:
bool CheckParamValid();
std::shared_ptr<LabelGotoTaskInfo> task_info_;
void *stream_;
void *label_;
void *stream_{nullptr};
void *label_{nullptr};
void *label_info_{nullptr};
void *index_value_{nullptr};
};
} // namespace model_runner
} // namespace ge

@ -50,9 +50,13 @@ const char *const kFileNameSuffix = "online";
const char *const kAicpuAllshape = "_AllShape";
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
const int64_t kDynamicDimValue = -2;
const int kDefaultDeviceId = 0;
const int kDefaultJobId = 0;
std::map<ge::OpEngineType, std::string> engine_type_map{
{ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}};
{ge::ENGINE_SYS, kEngineNameDefault},
{ge::ENGINE_AICORE, kAIcoreEngine},
{ge::ENGINE_VECTOR, kVectorEngine}};
bool ContainsDynamicInpus(const ge::OpDesc &op_desc) {
for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) {
@ -83,8 +87,9 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
} else {
ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
{op_desc->GetName(), op_desc->GetType(), "engine type",
"it only support kEngineNameDefault/kAIcoreEngine/kVectorEngine"});
GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type));
"it only support default/AIcoreEngine/VectorEngine"});
GELOGE(FAILED, "[Check][EngineType]value:%d not support, "
"only support default/AIcoreEngine/VectorEngine now", static_cast<int>(engine_type));
return FAILED;
}
@ -188,17 +193,20 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
(void)AttrUtils::SetBool(data_op, "_is_single_op", true);
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail.");
GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
"[Add][InputDesc]fail for node:%s", data_op->GetName().c_str());
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
"[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str());
if (attr) {
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail.");
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED,
"[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
}
ge::NodePtr arg_node = graph->AddNode(data_op);
GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail.");
GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail");
GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)),
"Add edge[%s->%s] fail.", data_op->GetName().c_str(), node->GetName().c_str());
"[Add][Edge]fail from node:%s to node:%s", data_op->GetName().c_str(), node->GetName().c_str());
return SUCCESS;
}
@ -213,20 +221,23 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons
for (const auto &out_desc : outputs) {
GeTensorDesc tensor = out_desc.GetTensorDesc();
TensorUtils::SetInputTensor(tensor, true);
GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail");
GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
"[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str());
TensorUtils::SetInputTensor(tensor, false);
TensorUtils::SetOutputTensor(tensor, true);
GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail");
GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
"[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str());
count++;
}
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
ge::NodePtr out_node = graph->AddNode(op_desc);
GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail.");
GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED,
"[Add][Node:%s]fail in graph:%u", op_desc->GetName().c_str(), graph->GetGraphID());
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
for (int32_t i = 0; i < count; ++i) {
GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)),
"Add edge[%s->%s] fail.", node->GetName().c_str(), out_node->GetName().c_str());
"[Add][Edge]fail from node:%s to node:%s", node->GetName().c_str(), out_node->GetName().c_str());
}
return SUCCESS;
@ -710,7 +721,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
auto node = comp_graph->FindNode(op_desc->GetName());
Status ret = CheckEngineTypeSupport(node, engine_type);
if (ret != SUCCESS) {
GELOGE(ret, "check engine type failed.");
GELOGE(ret, "[Check][EngineType]value:%d for node:%s not support", engine_type, node->GetName().c_str());
return ret;
}
}
@ -915,6 +926,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
static std::atomic<uint64_t> atomic_session_id(0);
auto session_id = atomic_session_id.fetch_add(1);
// This is a temporary add for graph with variable
auto version = static_cast<int32_t>(SessionVersion::ClOUD_VERSION);
ret = VarManager::Instance(session_id)->Init(version, session_id, kDefaultDeviceId, kDefaultJobId);
GELOGI("Start init var instance, session_id %lu", session_id);
if (ret != SUCCESS) {
GELOGW("Failed init var instance, session_id %lu", session_id);
}
if (is_singleop_unregistered_) {
ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id);
} else {

@ -400,6 +400,10 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor
}
static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
if (graph->GetGraphUnknownFlag()) {
GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
return SUCCESS;
}
for (auto &node : graph->GetDirectNode()) {
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
auto op_desc = node->GetOpDesc();

@ -33,13 +33,21 @@ using std::queue;
namespace ge {
LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {}
const string &LogicalStreamPass::GetName() const { return name_; }
const string &LogicalStreamPass::GetName() const {
return name_;
}
bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { return subgraph.engine_conf.skip_assign_stream; }
bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const {
return subgraph.engine_conf.skip_assign_stream;
}
bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { return subgraph.engine_conf.attach; }
bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const {
return subgraph.engine_conf.attach;
}
bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { return subgraph.engine_conf.independent; }
bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const {
return subgraph.engine_conf.independent;
}
bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const {
return !subgraph.subgraph_info.GetStreamLabel().empty();
@ -60,14 +68,14 @@ Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &
// Subgraphs of the same stream_label are assigned to the same stream,
// and different stream_labels are assigned new streams.
auto iter = label_streams.find(stream_label);
if (iter != label_streams.end()) {
subgraph->stream_id = iter->second;
} else {
if (iter == label_streams.end()) {
subgraph->stream_id = next_stream;
GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str());
label_streams.emplace(stream_label, next_stream);
++next_stream;
next_stream++;
} else {
subgraph->stream_id = iter->second;
}
changed = true;
}
@ -92,15 +100,15 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt
const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
auto &label_streams = engine_streams[engine];
auto iter = label_streams.find(stream_label);
if (iter != label_streams.end()) {
subgraph->stream_id = iter->second;
} else {
if (iter == label_streams.end()) {
subgraph->stream_id = next_stream;
GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(),
stream_label.c_str());
label_streams.emplace(stream_label, next_stream);
++next_stream;
next_stream++;
} else {
subgraph->stream_id = iter->second;
}
changed = true;
}
@ -121,7 +129,9 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP
}
SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map);
if (reusable_subgraph != nullptr) {
if (reusable_subgraph == nullptr) {
(void)AssignNewStream(subgraph);
} else {
if (HasAssignedStream(*reusable_subgraph)) {
subgraph->stream_id = reusable_subgraph->stream_id;
} else {
@ -140,8 +150,6 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP
GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(),
subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(),
reusable_subgraph->engine_conf.id.c_str());
} else {
(void)AssignNewStream(subgraph);
}
changed = true;
}
@ -191,13 +199,15 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr
auto iter = pld_subgraph_map.find(end_pld_pair.second);
if (iter != pld_subgraph_map.end()) {
const SubgraphPtr &pred_subgraph_succ = iter->second;
if (pred_subgraph_succ != subgraph && pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id) {
if ((pred_subgraph_succ != subgraph) &&
(pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id)) {
return false;
}
}
}
if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || IsEngineAttach(*subgraph)) {
if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) ||
IsEngineAttach(*subgraph)) {
return true;
}
@ -406,7 +416,7 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<Subgr
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
auto stream_id = op_desc->GetStreamId();
if (stream_id != kInvalidStream && !HasStreamLabel(*subgraph)) {
if ((stream_id != kInvalidStream) && !HasStreamLabel(*subgraph)) {
ops_without_label.emplace(op_desc);
}
}
@ -463,7 +473,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt
for (const NodePtr &node : graph->GetDirectNode()) {
if (!IsHcomNode(node->GetType()) ||
node->GetInDataNodes().size() <= 1) {
(node->GetInDataNodes().size() <= 1)) {
continue;
}
@ -575,7 +585,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
GE_CHECK_NOTNULL(graph);
NodePtr parent_node = graph->GetParentNode();
if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) {
if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) {
context_.default_stream = kInvalidStream;
} else {
context_.default_stream = parent_node->GetOpDesc()->GetStreamId();
@ -597,7 +607,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
return status;
}
GELOGD("Subgraphs of graph %s:", graph->GetName().c_str());
GELOGD("Subgraphs of graph %s", graph->GetName().c_str());
for (const auto &subgraph : subgraphs) {
if (subgraph != nullptr) {
GELOGD("subgraph: %s", subgraph->name.c_str());
@ -686,7 +696,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra
auto op_desc = node->GetOpDesc();
if (op_desc != nullptr) {
int64_t stream_id = op_desc->GetStreamId();
if (stream_id != kInvalidStream && stream_id < stream_num) {
if ((stream_id != kInvalidStream) && (stream_id < stream_num)) {
stream_has_node[stream_id] = true;
}
}
@ -695,10 +705,10 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra
context_.next_stream = 0;
vector<int64_t> old_to_new_streams(stream_num, kInvalidStream);
for (size_t old_stream = 0; old_stream < stream_has_node.size(); ++old_stream) {
for (size_t old_stream = 0; old_stream < stream_has_node.size(); old_stream++) {
if (stream_has_node[old_stream]) {
old_to_new_streams[old_stream] = context_.next_stream;
++context_.next_stream;
context_.next_stream++;
}
}
@ -706,7 +716,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra
auto op_desc = node->GetOpDesc();
if (op_desc != nullptr) {
int64_t stream_id = op_desc->GetStreamId();
if (stream_id != kInvalidStream && stream_id < stream_num) {
if ((stream_id != kInvalidStream) && (stream_id < stream_num)) {
op_desc->SetStreamId(old_to_new_streams[stream_id]);
}
}

@ -70,7 +70,10 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
return SUCCESS;
}
if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) {
GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front());
GELOGE(FAILED, "[Check][MemRangeStep]first mem_range_step:%ld less than 0,invalid,"
"maybe has dynamic shape in graph", all_memory_size.front());
REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid,"
"maybe has dynamic shape in graph", all_memory_size.front());
return FAILED;
}
// Memory size is 512 aligned, so it is not necessary to take less than 512
@ -81,12 +84,18 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
GELOGD("Range number: %zu", range_number);
vector<vector<int64_t>> ranges(range_number);
GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
GE_CHK_BOOL_EXEC((range_number != 0),
REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid");
return PARAM_INVALID,
"[Check][RangeNumber]inner data is 0, judge invalid.");
size_t range_number_limit = all_memory_size.size() / range_number;
int64_t range_ceil = min_memory_size;
for (size_t i = 1; i <= range_number; i++) {
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
GELOGE(FAILED, "Multiply result is out of range.");
GELOGE(FAILED, "[Check][MemRangeCeil]Multiply result is out of range,"
"range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval);
REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range,"
"range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval);
return FAILED);
range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time.
for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -385,7 +385,7 @@ Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_inf
Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
if (output_descs.size() != output_addrs.size()) {
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
inner_dump_info.op->GetName().c_str(), output_descs.size());
@ -436,7 +436,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
// else data, const or variable op
aicpu::dump::Output output;
auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
if (output_tensor == nullptr) {
GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
inner_dump_info.op->GetOutputsSize());
@ -540,7 +540,7 @@ Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info
Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
GELOGI("Start dump input");
const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op);
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op);
if (input_descs.size() != input_addrs.size()) {
GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
inner_dump_info.op->GetName().c_str(), input_descs.size());

@ -36,9 +36,21 @@
namespace ge {
class DataDumper {
public:
DataDumper() : runtime_param_{} {}
explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {}
explicit DataDumper(RuntimeParam *rsh)
: model_name_(),
model_id_(0),
runtime_param_(rsh),
dev_mem_load_(nullptr),
dev_mem_unload_(nullptr),
op_list_(),
input_map_(),
load_flag_(false),
device_id_(0),
global_step_(0),
loop_per_iter_(0),
loop_cond_(0),
compute_graph_(nullptr),
ref_info_() {}
~DataDumper();
@ -93,10 +105,10 @@ class DataDumper {
// for inference data dump
std::string om_name_;
uint32_t model_id_ = 0;
const RuntimeParam &runtime_param_;
void *dev_mem_load_ = nullptr;
void *dev_mem_unload_ = nullptr;
uint32_t model_id_;
RuntimeParam *runtime_param_;
void *dev_mem_load_;
void *dev_mem_unload_;
struct InnerDumpInfo;
struct InnerInputMapping;
@ -107,12 +119,12 @@ class DataDumper {
uint32_t end_graph_stream_id_ = 0;
bool is_end_graph_ = false;
std::multimap<std::string, InnerInputMapping> input_map_; // release after DavinciModel::Init
bool load_flag_ = false;
uint32_t device_id_ = 0;
uintptr_t global_step_ = 0;
uintptr_t loop_per_iter_ = 0;
uintptr_t loop_cond_ = 0;
ComputeGraphPtr compute_graph_ = nullptr; // release after DavinciModel::Init
bool load_flag_;
uint32_t device_id_;
uintptr_t global_step_;
uintptr_t loop_per_iter_;
uintptr_t loop_cond_;
ComputeGraphPtr compute_graph_; // release after DavinciModel::Init
std::map<OpDescPtr, void *> ref_info_; // release after DavinciModel::Init
void *l1_fusion_addr_ = nullptr;

@ -31,6 +31,7 @@
#include "common/scope_guard.h"
#include "common/thread_pool.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/compute_graph.h"
#include "graph/debug/ge_attr_define.h"
@ -184,7 +185,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
last_execute_mode_(INITIALIZATION),
session_id_(0),
device_id_(0),
maxDumpOpNum_(0), data_dumper_(runtime_param_),
maxDumpOpNum_(0), data_dumper_(&runtime_param_),
iterator_count_(0),
is_l1_fusion_enable_(false),
is_first_execute_(true) {
@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() {
GE_CHK_STATUS(task->Release(), "Release task failed.");
}
}
for (auto &item : label_goto_args_) {
GE_FREE_RT_LOG(item.second.first);
}
label_goto_args_.clear();
}
Status DavinciModel::Assign(const GeModelPtr &ge_model) {
@ -654,12 +660,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
runtime_param_.graph_id = compute_graph->GetGraphID();
// op debug register
GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed.");
GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed");
GE_TIMESTAMP_START(TransAllVarData);
GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed.");
GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed");
GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData");
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed.");
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed");
GE_TIMESTAMP_START(InitModelMem);
GELOGD("Known node is %d.", known_node_);
@ -667,7 +673,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
if (!known_node_) {
GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
data_inputer_ = new (std::nothrow) DataInputer();
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr");
}
fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_);
GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem");
@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info
}
}
Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) {
std::lock_guard<std::mutex> lock(label_args_mutex_);
auto it = label_goto_args_.find(label_index);
if (it != label_goto_args_.end()) {
arg_addr = it->second.first;
arg_size = it->second.second;
return SUCCESS;
}
if (label_index >= label_list_.size()) {
GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size());
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL(label_list_[label_index]);
vector<rtLabel_t> label_used = { label_list_[label_index] };
arg_size = label_used.size() * sizeof(rtLabelDevInfo);
rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
label_goto_args_[label_index] = { arg_addr, arg_size };
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
/// @ingroup ge
/// @brief LabelSet Op Initialize.
/// @param [in] op_desc: LabelSet Op descriptor.

@ -273,6 +273,8 @@ class DavinciModel {
const vector<rtLabel_t> &GetLabelList() const { return label_list_; }
Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size);
Status DestroyThread();
// get Op
@ -930,6 +932,9 @@ class DavinciModel {
vector<rtLabel_t> label_list_;
set<uint32_t> label_id_indication_;
mutex label_args_mutex_;
map<uint32_t, pair<void *, uint32_t>> label_goto_args_;
mutex outside_addrs_mutex_;
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
set<const void *> copy_only_addrs_; // Address need copy to original place.

@ -297,12 +297,11 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
if (model_id == INVALID_MODEL_ID) {
GenModelId(&model_id);
}
bool is_shape_unknown = false;
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
string model_name = "";
GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u",
model_id);
if (is_shape_unknown || GetContext().GetHostExecFlag()) {
bool is_shape_unknown = ge_root_model->GetRootGraph()->GetGraphUnknownFlag();
// if multi subgraph is known, do hybrid load process
if (is_shape_unknown || GetContext().GetHostExecFlag() || (name_to_model.size() > 1)) {
return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener);
}
@ -324,7 +323,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
auto root_graph = ge_root_model->GetRootGraph();
GE_CHECK_NOTNULL(root_graph);
string root_model_name = root_graph->GetName();
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr ge_model = name_to_model[root_model_name];
Status ret = SUCCESS;
do {

@ -17,9 +17,15 @@
#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h"
#include "graph/load/model_manager/davinci_model.h"
#include "graph/debug/ge_attr_define.h"
namespace ge {
constexpr uint8_t kGotoBranchMax = 1;
LabelGotoExTaskInfo::~LabelGotoExTaskInfo() {
args_ = nullptr;
GE_FREE_RT_LOG(index_value_);
}
Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("LabelGotoExTaskInfo Init Start.");
GE_CHECK_NOTNULL(davinci_model);
@ -28,7 +34,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
return FAILED;
}
// Get LabelGoto task def
// Get LabelGotoEx task def
const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex();
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index());
if (op_desc == nullptr) {
@ -43,20 +49,38 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
return INTERNAL_ERROR;
}
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
if (label_index >= label_list.size()) {
GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size());
return INTERNAL_ERROR;
rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;
GELOGI("memory_type: %u", memory_type);
GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_));
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
label_ = label_list[label_index];
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_);
uint64_t branch_index = 0;
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index);
return SUCCESS;
}
Status LabelGotoExTaskInfo::Distribute() {
GELOGI("LabelGotoExTaskInfo Distribute Start.");
rtError_t rt_ret = rtLabelGotoEx(label_, stream_);
GE_CHECK_NOTNULL(args_);
GE_CHECK_NOTNULL(index_value_);
if (args_size_ == 0) {
GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_);
return PARAM_INVALID;
}
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);

@ -14,24 +14,26 @@
* limitations under the License.
*/
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
#include "graph/load/model_manager/task_info/task_info.h"
namespace ge {
class LabelGotoExTaskInfo : public TaskInfo {
public:
LabelGotoExTaskInfo() : label_(nullptr) {}
LabelGotoExTaskInfo() = default;
~LabelGotoExTaskInfo() override { label_ = nullptr; }
~LabelGotoExTaskInfo() override;
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
Status Distribute() override;
private:
void *label_;
void *index_value_{nullptr}; // switch index input.
void *args_{nullptr}; // label info memory.
uint32_t args_size_{0}; // label info length.
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
#include "graph/load/model_manager/task_info/task_info.h"
@ -34,4 +34,4 @@ class LabelSetTaskInfo : public TaskInfo {
void *label_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_

@ -16,20 +16,13 @@
#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/model_manager/davinci_model.h"
namespace ge {
constexpr uint8_t kLabelSwitchIndexNum = 1;
LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() {
if (args_ != nullptr) {
rtError_t ret = rtFree(args_);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
}
}
args_ = nullptr;
GE_FREE_RT_LOG(args_);
index_value_ = nullptr;
}
@ -37,13 +30,12 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
GELOGI("LabelSwitchByIndexTaskInfo Init Start.");
GE_CHECK_NOTNULL(davinci_model);
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList());
if (ret != SUCCESS) {
return FAILED;
}
// Get LabelSwitch task def
// Get LabelSwitchByIndex task def
const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index();
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index());
if (op_desc == nullptr) {
@ -68,7 +60,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
davinci_model->DisableZeroCopy(index_value_);
std::vector<uint32_t> label_idx_list;
vector<uint32_t> label_idx_list;
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) {
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(),
ATTR_NAME_LABEL_SWITCH_LIST.c_str());
@ -81,7 +73,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
return INTERNAL_ERROR;
}
label_list_.resize(branch_max_, nullptr);
vector<rtLabel_t> label_used(branch_max_, nullptr);
const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
for (size_t idx = 0; idx < label_idx_list.size(); ++idx) {
uint32_t label_id = label_idx_list[idx];
if (label_id >= label_list.size()) {
@ -90,8 +83,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL(label_list[label_id]);
label_list_[idx] = label_list[label_id];
label_used[idx] = label_list[label_id];
}
rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;
@ -103,7 +95,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_);
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@ -125,7 +117,7 @@ Status LabelSwitchByIndexTaskInfo::Distribute() {
rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGI("LabelSwitchByIndexTaskInfo Distribute Success.");

@ -14,16 +14,15 @@
* limitations under the License.
*/
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
#include "graph/load/model_manager/task_info/task_info.h"
namespace ge {
class LabelSwitchByIndexTaskInfo : public TaskInfo {
public:
LabelSwitchByIndexTaskInfo()
: index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {}
LabelSwitchByIndexTaskInfo() = default;
~LabelSwitchByIndexTaskInfo() override;
@ -34,12 +33,11 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo {
Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
private:
void *index_value_; // switch index input.
uint32_t branch_max_; // max branch count.
void *args_; // label info memory.
uint32_t args_size_; // label info length.
std::vector<rtLabel_t> label_list_;
int64_t fixed_addr_offset_;
void *index_value_{nullptr}; // switch index input.
uint32_t branch_max_{0}; // max branch count.
void *args_{nullptr}; // label info memory.
uint32_t args_size_{0}; // label info length.
int64_t fixed_addr_offset_{0};
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
#endif // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_

@ -40,7 +40,7 @@ static bool BlockComparator(const Block *left, const Block *right) {
}
bool CanMerge(Block *block) {
if (block == nullptr || block->allocated || !block->IsSplit()) {
if ((block == nullptr) || block->allocated || !block->IsSplit()) {
return false;
}
return true;
@ -52,7 +52,7 @@ size_t GetBinIndex(size_t size) {
if (size <= range) {
break;
}
++index;
index++;
}
if (index > kNumBins - 1) {
index = kNumBins - 1;
@ -87,15 +87,15 @@ bool ShouldSplit(const Block *block, size_t size) {
void IncreaseCount(std::map<size_t, size_t> &count, size_t size) {
auto it = count.find(size);
if (it != count.end()) {
it->second++;
} else {
if (it == count.end()) {
count.emplace(size, 1);
} else {
it->second++;
}
}
CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) {
for (uint32_t i = 0; i < kNumBins; ++i) {
for (uint32_t i = 0; i < kNumBins; i++) {
free_block_bins_[i] = nullptr;
}
}
@ -105,7 +105,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) {
// when redo Initialize free old memory
FreeBlocks();
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (uint32_t i = 0; i < kNumBins; ++i) {
for (uint32_t i = 0; i < kNumBins; i++) {
if (free_block_bins_[i] != nullptr) {
continue;
}
@ -132,18 +132,18 @@ void CachingAllocator::Finalize(uint32_t device_id) {
uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) {
GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id);
uint8_t *ptr = nullptr;
size = GetBlockSize(size);
uint8_t *ptr = nullptr;
Block *block = FindFreeBlock(size, org_ptr, device_id);
if (block != nullptr) {
ptr = block->ptr;
} else {
if (block == nullptr) {
if (ge::SUCCESS == TryExtendCache(size, device_id)) {
block = FindFreeBlock(size, org_ptr, device_id);
if (block != nullptr) {
ptr = block->ptr;
}
}
} else {
ptr = block->ptr;
}
if (ptr == nullptr) {
GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size);
@ -171,7 +171,7 @@ Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) {
}
void CachingAllocator::FreeBlock(Block *block) {
if (block == nullptr || !block->allocated) {
if ((block == nullptr) || !block->allocated) {
return;
}
GELOGI("Free block size = %zu", block->size);
@ -187,7 +187,7 @@ void CachingAllocator::FreeBlock(Block *block) {
}
void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) {
if (!CanMerge(dst) || !CanMerge(src)) {
if (!CanMerge(src) || !CanMerge(dst)) {
return;
}
@ -316,7 +316,7 @@ size_t CachingAllocator::FreeCachedBlocks() {
GELOGI("Free cached blocks");
std::lock_guard<std::recursive_mutex> lock(mutex_);
size_t free_cached_memory_size = 0;
for (uint32_t i = 0; i < kNumBins; ++i) {
for (uint32_t i = 0; i < kNumBins; i++) {
auto pool = free_block_bins_[i];
if (pool == nullptr) {
continue;
@ -324,7 +324,8 @@ size_t CachingAllocator::FreeCachedBlocks() {
for (auto it = pool->begin(); it != pool->end();) {
Block *block = *it;
// free block memory that has not been split
if ((block != nullptr) && (block->ptr != nullptr) && (block->prev == nullptr) && (block->next == nullptr) &&
if ((block != nullptr) && (block->ptr != nullptr) &&
(block->prev == nullptr) && (block->next == nullptr) &&
(memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) {
auto itcount = malloced_memory_.find(block->size);
free_cached_memory_size += block->size;
@ -345,7 +346,7 @@ size_t CachingAllocator::FreeCachedBlocks() {
}
void CachingAllocator::FreeBlocks() {
GELOGI("Free blocks");
GELOGI("Free blocks.");
std::lock_guard<std::recursive_mutex> lock(mutex_);
// free allocated blocks and put to cache
for (auto &it : allocated_blocks_) {
@ -356,9 +357,9 @@ void CachingAllocator::FreeBlocks() {
}
void CachingAllocator::FreeBlockBins() {
GELOGI("Free block bins");
GELOGI("Free block bins.");
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (uint32_t i = 0; i < kNumBins; ++i) {
for (uint32_t i = 0; i < kNumBins; i++) {
if (free_block_bins_[i] != nullptr) {
delete free_block_bins_[i];
free_block_bins_[i] = nullptr;
@ -367,9 +368,9 @@ void CachingAllocator::FreeBlockBins() {
}
void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) {
GELOGI("%6s total[size:%10zu count:%10zu]", name.c_str(), total_size, total_count);
GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count);
for (auto &it : count) {
GELOGI(" |- block[size:%10zu count:%10zu]", it.first, it.second);
GELOGI(" |- block[size:%10zu count:%10zu].", it.first, it.second);
}
}
@ -383,20 +384,20 @@ void CachingAllocator::PrintStatics() {
size_t total_free_count = 0;
size_t total_malloc_size = 0;
size_t total_malloc_count = 0;
std::map<size_t, size_t> using_block;
std::map<size_t, size_t> free_block;
std::map<size_t, size_t> malloc_block;
std::map<size_t, size_t> using_block_stat;
std::map<size_t, size_t> free_block_stat;
std::map<size_t, size_t> malloc_block_stat;
do {
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (uint32_t i = 0; i < kNumBins; ++i) {
for (uint32_t i = 0; i < kNumBins; i++) {
auto pool = free_block_bins_[i];
if (pool == nullptr) {
continue;
}
for (auto it = pool->begin(); it != pool->end(); ++it) {
for (auto it = pool->begin(); it != pool->end(); it++) {
if ((*it) != nullptr) {
total_free_size += (*it)->size;
IncreaseCount(free_block, (*it)->size);
IncreaseCount(free_block_stat, (*it)->size);
total_free_count++;
}
}
@ -405,7 +406,7 @@ void CachingAllocator::PrintStatics() {
for (auto &it : allocated_blocks_) {
if (it.second != nullptr) {
total_using_size += it.second->size;
IncreaseCount(using_block, it.second->size);
IncreaseCount(using_block_stat, it.second->size);
total_using_count++;
}
}
@ -413,12 +414,12 @@ void CachingAllocator::PrintStatics() {
for (auto &it : malloced_memory_) {
total_malloc_size += it.first * it.second;
total_malloc_count += it.second;
malloc_block[it.first] = it.second;
malloc_block_stat[it.first] = it.second;
}
} while (0);
PrintCount(malloc_block, "Malloc", total_malloc_size, total_malloc_count);
PrintCount(using_block, "Using", total_using_size, total_using_count);
PrintCount(free_block, "Free", total_free_size, total_free_count);
PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count);
PrintCount(using_block_stat, "Using", total_using_size, total_using_count);
PrintCount(free_block_stat, "Free", total_free_size, total_free_count);
}
} // namespace ge

@ -359,7 +359,10 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph);
GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed");
return FAILED);
// update option about tuning graph
ParseOption(options, BUILD_MODE, options_.build_mode);
ParseOption(options, BUILD_STEP, options_.build_step);
ParseOption(options, TUNING_PATH, options_.tuning_path);
graph_node->SetGraph(graph_ptr);
graph_node->SetOptions(options);
AddGraphNode(graph_id, graph_node);
@ -433,6 +436,10 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap
GELOGE(FAILED, "GraphPtr make shared failed");
return FAILED;
}
// update option about tuning graph
ParseOption(options, BUILD_MODE, options_.build_mode);
ParseOption(options, BUILD_STEP, options_.build_step);
ParseOption(options, TUNING_PATH, options_.tuning_path);
graph_node->SetGraph(graph_ptr);
graph_node->SetOptions(options);
@ -1466,6 +1473,10 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
GE_IF_BOOL_EXEC(ret != SUCCESS,
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1.");
return GE_GRAPH_OPTIONS_INVALID);
// Set Build model and step
ParseOption(options, BUILD_MODE, options_.build_mode);
ParseOption(options, BUILD_STEP, options_.build_step);
ParseOption(options, BUILD_STEP, options_.tuning_path);
// ge.graphType.
options_.run_graph_flag = true;
@ -1514,10 +1525,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d",
options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type);
// Set Build model and step
ParseOption(options, BUILD_MODE, options_.build_mode);
ParseOption(options, BUILD_STEP, options_.build_step);
return SUCCESS;
}
@ -1549,6 +1556,7 @@ void GraphManager::ParseOption(const std::map<std::string, std::string> &options
std::string &option) {
auto iter = options.find(key);
if (iter != options.end()) {
GELOGD("Set option %s from value %s to value%s", key.c_str(), option.c_str(), iter->second.c_str());
option = iter->second;
}
}
@ -3132,6 +3140,21 @@ Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPar
non_tuning_subgraphs.push_back(sub_graph_tmp);
}
}
// for function graphs to tune
for (auto &function_graph : compute_graph->GetAllSubgraphs()) {
auto subgraph_list = sub_graph_map[function_graph];
for (const auto &sub_graph_info_ptr : subgraph_list) {
GE_CHECK_NOTNULL(sub_graph_info_ptr);
ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph();
// need to tuning
if (sub_graph_info_ptr->GetEngineName() == kVectorEngine ||
sub_graph_info_ptr->GetEngineName() == kAIcoreEngine) {
tuning_subgraphs.push_back(sub_graph_tmp);
} else {
non_tuning_subgraphs.push_back(sub_graph_tmp);
}
}
}
return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path);
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save