|
|
|
@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
reusable_block->continuous_block_ = continuous;
|
|
|
|
|
reusable_block->ref_count_++;
|
|
|
|
|
reusable_blocks_[memory_type][stream_id].erase((++it).base());
|
|
|
|
|
return reusable_block;
|
|
|
|
|
}
|
|
|
|
@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
|
|
|
|
|
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
|
|
|
|
|
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size);
|
|
|
|
|
block->stream_id_ = node_op_desc->GetStreamId();
|
|
|
|
|
block->ref_count_++;
|
|
|
|
|
block->continuous_block_ = continuous;
|
|
|
|
|
block->batch_label_ = batch_label;
|
|
|
|
|
if (mem_type == kOutput) {
|
|
|
|
@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
|
|
|
|
|
// hccl task need align header and tail
|
|
|
|
|
block->first_continuous_block_ = true;
|
|
|
|
|
block->last_continuous_block_ = true;
|
|
|
|
|
++(block->ref_count_);
|
|
|
|
|
} else {
|
|
|
|
|
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str());
|
|
|
|
|
return INTERNAL_ERROR;
|
|
|
|
@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
|
|
|
|
|
return nullptr, "Get no align size failed");
|
|
|
|
|
|
|
|
|
|
std::string symbol;
|
|
|
|
|
bool reuse_input = false;
|
|
|
|
|
if (IsSymbolExist(node_index_io, symbol)) {
|
|
|
|
|
block = symbol_blocks_[symbol];
|
|
|
|
|
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str());
|
|
|
|
@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
|
|
|
|
|
block->SetLifeTimeEnd(life_time_);
|
|
|
|
|
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size);
|
|
|
|
|
block->ref_count_++;
|
|
|
|
|
reuse_input = true;
|
|
|
|
|
|
|
|
|
|
// add new size
|
|
|
|
|
align_size = block_size;
|
|
|
|
@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
|
|
|
|
|
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type);
|
|
|
|
|
}
|
|
|
|
|
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr.");
|
|
|
|
|
int out_count_reuse_input = block->ref_count_;
|
|
|
|
|
int out_count = 0;
|
|
|
|
|
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr);
|
|
|
|
|
auto out_data_anchor = n->GetOutDataAnchor(index);
|
|
|
|
@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
|
|
|
|
|
out_count++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
bool reuse_input = false;
|
|
|
|
|
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
|
|
|
|
|
auto owner_node = in_anchor->GetOwnerNode();
|
|
|
|
|
GE_IF_BOOL_EXEC(owner_node == nullptr, continue);
|
|
|
|
|
auto op_desc = owner_node->GetOpDesc();
|
|
|
|
|
GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
|
|
|
|
|
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) {
|
|
|
|
|
bool dst_reuse_input = false;
|
|
|
|
|
uint32_t dst_reuse_input_index = 0;
|
|
|
|
|
auto owner_node_op_desc = op_desc->GetOutputDescPtr(i);
|
|
|
|
|
GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue);
|
|
|
|
|
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS,
|
|
|
|
|
GELOGI("Get dst_reuse_input failed"));
|
|
|
|
|
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS,
|
|
|
|
|
GELOGI("Get dst_reuse_input_index failed"));
|
|
|
|
|
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) {
|
|
|
|
|
out_count_reuse_input += 1;
|
|
|
|
|
reuse_input = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count;
|
|
|
|
|
block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1)
|
|
|
|
|
: (block->ref_count_ + out_count);
|
|
|
|
|
return block;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
|
|
|
|
|
GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index,
|
|
|
|
|
node_type_indexs.back().node->GetName().c_str());
|
|
|
|
|
|
|
|
|
|
if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
|
|
|
|
|
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
|
|
|
|
|
bool is_block_matched = false;
|
|
|
|
|
for (auto &node_type_index : node_type_indexs) {
|
|
|
|
|
is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
|
|
|
|
|
(node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()));
|
|
|
|
|
if (is_block_matched) {
|
|
|
|
|
GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, "
|
|
|
|
|
"current node:%s, input index:%d, block ref_count:%d.",
|
|
|
|
|
node_type_index.node->GetName().c_str(), node_type_index.index,
|
|
|
|
|
node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (is_block_matched) {
|
|
|
|
|
ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
|
|
|
|
|
if (block->ref_count_ == 0 && block->same_stream_) {
|
|
|
|
|
SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) {
|
|
|
|
|
if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
int64_t stream_id = node->GetOpDesc()->GetStreamId();
|
|
|
|
|
auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx));
|
|
|
|
|
bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0);
|
|
|
|
|
if (is_suspended) {
|
|
|
|
|
block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1);
|
|
|
|
|
stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block);
|
|
|
|
|
GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, "
|
|
|
|
|
"size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) {
|
|
|
|
|
auto op_desc = node->GetOpDesc();
|
|
|
|
|
int64_t stream_id = op_desc->GetStreamId();
|
|
|
|
@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
|
|
|
|
|
// Allocate memory for the current node and release node memory of the same size in the workspace
|
|
|
|
|
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1",
|
|
|
|
|
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end();
|
|
|
|
|
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); });
|
|
|
|
|
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]);
|
|
|
|
|
iter->second[stream_id].clear();});
|
|
|
|
|
if (IsContinuousOutput(node)) {
|
|
|
|
|
return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_);
|
|
|
|
|
}
|
|
|
|
@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
symbol_blocks_[iter->second] = mem_block;
|
|
|
|
|
// The output is suspended, and will be released in allocation of next node.
|
|
|
|
|
CheckAndReleaseSuspendedBlock(node, i, mem_block);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return SUCCESS;
|
|
|
|
@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
|
|
|
|
|
if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) {
|
|
|
|
|
iter->second[stream_id].clear();
|
|
|
|
|
}
|
|
|
|
|
vector<int64_t> temp;
|
|
|
|
|
int64_t tatal_size = 0;
|
|
|
|
|
GetNodeWorkSpaceSize(n, temp, tatal_size);
|
|
|
|
@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
|
|
|
|
|
kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag,
|
|
|
|
|
is_op_reuse_mem_, false, memory_type);
|
|
|
|
|
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block.");
|
|
|
|
|
++(mem_block->ref_count_);
|
|
|
|
|
CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type);
|
|
|
|
|
}
|
|
|
|
|
for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) {
|
|
|
|
|