!7520 Critical path performance optimization

Merge pull request !7520 from chenweifeng/resnet50-profiling
pull/7520/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 89cd882077

File diff suppressed because it is too large Load Diff

@ -263,6 +263,11 @@ void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::v
}
void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) {
if (is_alloc_inplace_res_[graph->graph_id()]) {
return;
}
is_alloc_inplace_res_[graph->graph_id()] = true;
std::map<uint32_t, std::vector<CNodePtr>> inplace_groups;
auto kernel_cnodes = graph->execution_order();
for (auto &kernel : kernel_cnodes) {
@ -901,6 +906,11 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K
}
void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph *graph) {
if (is_alloc_communication_res_[graph->graph_id()]) {
return;
}
is_alloc_communication_res_[graph->graph_id()] = true;
MS_EXCEPTION_IF_NULL(graph);
auto &kernels = graph->execution_order();
for (auto &kernel : kernels) {
@ -1011,6 +1021,11 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
}
}
auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
if (AnfAlgo::IsCommunicationOp(kernel_with_index.first)) {
continue;
}
auto kernel_ref_count_ptr = mem_reuse_util_->GetKernelInputRef(cnode, i);
if (kernel_ref_count_ptr == nullptr) {
continue;

@ -100,6 +100,8 @@ class GPUKernelRuntime : public KernelRuntime {
std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
std::unordered_map<uint32_t, bool> is_first_step_map_;
std::unordered_map<uint32_t, std::set<AnfNodePtr>> graph_output_map_;
std::unordered_map<uint32_t, bool> is_alloc_communication_res_;
std::unordered_map<uint32_t, bool> is_alloc_inplace_res_;
MemReuseUtilPtr mem_reuse_util_{nullptr};
MemSwapManagerPtr mem_swap_manager_{nullptr};

Loading…
Cancel
Save