!7520 Critical path performance optimization

Merge pull request !7520 from chenweifeng/resnet50-profiling
pull/7520/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 89cd882077

File diff suppressed because it is too large Load Diff

@ -263,6 +263,11 @@ void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::v
} }
void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) { void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) {
if (is_alloc_inplace_res_[graph->graph_id()]) {
return;
}
is_alloc_inplace_res_[graph->graph_id()] = true;
std::map<uint32_t, std::vector<CNodePtr>> inplace_groups; std::map<uint32_t, std::vector<CNodePtr>> inplace_groups;
auto kernel_cnodes = graph->execution_order(); auto kernel_cnodes = graph->execution_order();
for (auto &kernel : kernel_cnodes) { for (auto &kernel : kernel_cnodes) {
@ -901,6 +906,11 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K
} }
void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph *graph) { void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph *graph) {
if (is_alloc_communication_res_[graph->graph_id()]) {
return;
}
is_alloc_communication_res_[graph->graph_id()] = true;
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
auto &kernels = graph->execution_order(); auto &kernels = graph->execution_order();
for (auto &kernel : kernels) { for (auto &kernel : kernels) {
@ -1011,6 +1021,11 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
} }
} }
auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
if (AnfAlgo::IsCommunicationOp(kernel_with_index.first)) {
continue;
}
auto kernel_ref_count_ptr = mem_reuse_util_->GetKernelInputRef(cnode, i); auto kernel_ref_count_ptr = mem_reuse_util_->GetKernelInputRef(cnode, i);
if (kernel_ref_count_ptr == nullptr) { if (kernel_ref_count_ptr == nullptr) {
continue; continue;

@ -100,6 +100,8 @@ class GPUKernelRuntime : public KernelRuntime {
std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_; std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
std::unordered_map<uint32_t, bool> is_first_step_map_; std::unordered_map<uint32_t, bool> is_first_step_map_;
std::unordered_map<uint32_t, std::set<AnfNodePtr>> graph_output_map_; std::unordered_map<uint32_t, std::set<AnfNodePtr>> graph_output_map_;
std::unordered_map<uint32_t, bool> is_alloc_communication_res_;
std::unordered_map<uint32_t, bool> is_alloc_inplace_res_;
MemReuseUtilPtr mem_reuse_util_{nullptr}; MemReuseUtilPtr mem_reuse_util_{nullptr};
MemSwapManagerPtr mem_swap_manager_{nullptr}; MemSwapManagerPtr mem_swap_manager_{nullptr};

Loading…
Cancel
Save