fix randomly hang issue of PaddleDetection training task on windows (#24977)

revert-24981-add_device_attr_for_regulization
liuwei1031 5 years ago committed by GitHub
parent 640196c446
commit 8603b5fb72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -32,6 +32,18 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
#ifdef PADDLE_WITH_CUDA
static constexpr size_t kMaxGpuAsyncCopyBytes = 64 * 1024; // 64K
inline void SyncCUDAStream() {
#if !defined(_WIN32)
cudaStreamSynchronize(0);
#else
cudaError_t e_sync = cudaSuccess;
while (e_sync = cudaStreamQuery(0)) {
if (e_sync == cudaErrorNotReady) continue;
break;
}
#endif
}
// NOTE(zcd): Do not use GpuMemcpySync as much as possible.
// because GpuMemcpySync issues the copying command to the default stream,
// which will make two commands from different streams cannot run concurrently.
@ -55,7 +67,7 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>(
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost);
// FIXME(zjl): do we really need it?
if (num <= kMaxGpuAsyncCopyBytes) {
cudaStreamSynchronize(0);
SyncCUDAStream();
}
}
}
@ -77,7 +89,7 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>(
platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice);
// FIXME(zjl): do we really need it?
if (num <= kMaxGpuAsyncCopyBytes) {
cudaStreamSynchronize(0);
SyncCUDAStream();
}
}
}

Loading…
Cancel
Save