!561 Dynamic multi batch memory optimization

From: @tangqunzhang
Reviewed-by: @xchu42,@wqtshg
Signed-off-by: @ji_chen
pull/561/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 5043ea87bb

@ -22,7 +22,7 @@ namespace {
const uint32_t kRangeCeilInterval = 2;
const uint32_t kLogBase = 2;
const int64_t kLargeBlockSize = 8 * 1024 * 1024;
const int64_t kLargeBlockRangeSize = 10;
const int64_t kLargeBlockRangeSize = 2;
} // namespace
namespace ge {
@ -73,15 +73,17 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
GELOGE(FAILED, "dividend is 0!");
return FAILED;
}
// Memory size is 512 aligned, so it is not necessary to take less than 512
int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front();
auto range_number = static_cast<size_t>(
ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase)));
ceil(log(all_memory_size.back() / static_cast<double>(min_memory_size)) / log(kLogBase)));
range_number = (range_number == 0) ? 1 : range_number;
GELOGD("Range number: %zu", range_number);
vector<vector<int64_t>> ranges(range_number);
GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
size_t range_number_limit = all_memory_size.size() / range_number;
int64_t range_ceil = all_memory_size[0];
int64_t range_ceil = min_memory_size;
for (size_t i = 1; i <= range_number; i++) {
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
GELOGE(FAILED, "Multiply result is out of range.");
@ -114,7 +116,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
range_ceils.push_back(range.back());
}
}
GELOGD("Range ceils: %s", ToString(range_ceils).c_str());
GELOGI("Range ceils: %s", ToString(range_ceils).c_str());
return SUCCESS;
}

File diff suppressed because it is too large Load Diff

@ -65,6 +65,7 @@ class MemoryBlock {
stream_id_(stream_id),
deleted_block_(false),
reuse_mem_(reuse_mem),
same_stream_(true),
input_index_(0),
continuous_block_(false),
first_continuous_block_(false),
@ -85,10 +86,14 @@ class MemoryBlock {
symbol_list_.clear();
}
void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size) {
void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size,
int64_t stream_id) {
real_size_list_.emplace_back(real_size);
no_align_size_list_.emplace_back(no_align_size);
node_type_index_list_.emplace_back(node, type, out_index, false);
if (stream_id != stream_id_) {
same_stream_ = false;
}
}
size_t Size() const { return block_size_; }
@ -106,6 +111,12 @@ class MemoryBlock {
node_type_index_list_.emplace_back(node_type_index);
real_size_list_.emplace_back(real_size);
no_align_size_list_.emplace_back(no_align_size);
if ((node_type_index.node != nullptr) && (node_type_index.node->GetOpDesc() != nullptr)) {
auto stream_id = node_type_index.node->GetOpDesc()->GetStreamId();
if (stream_id != stream_id_) {
same_stream_ = false;
}
}
}
void AddSymbol(const std::string &symbol) {
@ -122,7 +133,7 @@ class MemoryBlock {
std::string String();
bool IsSameLabel(std::string &first_batch_label);
bool IsSameBatchLabel();
void AddContinuousLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_node_depend_stream_life);
@ -142,6 +153,7 @@ class MemoryBlock {
int64_t stream_id_;
bool deleted_block_;
bool reuse_mem_;
bool same_stream_;
uint32_t input_index_;
bool continuous_block_;
bool first_continuous_block_;
@ -149,6 +161,7 @@ class MemoryBlock {
bool is_zero_copy_;
std::map<int64_t, size_t> depend_stream_life_;
int64_t memory_type_;
std::string batch_label_;
private:
size_t block_size_;
std::vector<size_t> real_size_list_;
@ -209,7 +222,7 @@ class BlockMemAssigner : public MemAssigner {
void GetOutAndWorkSpaceMem(std::vector<int64_t> &all_memory_size);
void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory);
void GetNodeWorkSpaceSize(const ge::NodePtr &node, std::vector<int64_t> &workspace_memory, int64_t &total_size);
///
/// @ingroup GE
@ -353,7 +366,7 @@ class BlockMemAssigner : public MemAssigner {
/// @return void
/// @author
///
void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory);
void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, bool same_stream = true);
///
/// @ingroup GE
@ -379,11 +392,11 @@ class BlockMemAssigner : public MemAssigner {
///
/// @ingroup GE
/// @brief Merge memory blocks between different batchs
/// @brief Resize memory blocks for each batchs
/// @return merge or not
/// @author
///
bool MergeDynamicBatchBlocks();
void ResizeDynamicBatchBlocks();
void AssignContinuousBlocks();
@ -436,6 +449,17 @@ class BlockMemAssigner : public MemAssigner {
int64_t atomic_addr_clean_id_ = 0;
size_t theory_min_memory_size_ = 0;
size_t theory_memory_size_ = 0;
std::string max_batch_label_;
///
/// @ [stream1][nodeid]
/// @[nodeid] [stream2][nodeid]
/// @ [stream2][nodeid]
///
DependStreamLife total_node_depend_stream_life_;
};
} // namespace ge

@ -1646,9 +1646,9 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve
}
string atomic_mem_size_str = ss.str();
GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]",
node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str());
}
return SUCCESS;
}

@ -2202,7 +2202,7 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data
void *mem_addr = data.second.GetBasicAddr();
void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data));
uint64_t data_buf_length = data_buf.length;
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]",
GELOGI("CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]",
runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length);
GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind));
}

@ -61,7 +61,7 @@ vector<int64_t> ModelUtils::GetInputSize(ConstOpDescPtr op_desc) {
GELOGI("Get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i);
continue);
GELOGI("[IMAS]GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
GELOGI("GetInputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
v_input_size.push_back(tensor_size);
}
@ -96,7 +96,7 @@ vector<int64_t> ModelUtils::GetOutputSize(ConstOpDescPtr op_desc) {
GELOGI("Get size from TensorDesc failed, op : %s, output index : %zu", op_desc->GetName().c_str(), i);
continue);
GELOGI("[IMAS]GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
GELOGI("GetOutputSize op: %s, index: %zu, size:%ld", op_desc->GetName().c_str(), i, tensor_size);
v_output_size.push_back(tensor_size);
}

Loading…
Cancel
Save