refine error message of gpu allocator, test=develop (#21008)

custom_op_abi
Zeng Jinle 6 years ago committed by GitHub
parent bc8e600ce5
commit f56967c483
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -105,6 +105,21 @@ bool CPUAllocator::UseGpu() const { return false; }
#ifdef PADDLE_WITH_CUDA
static void ClearCUDAOutOfMemoryError(cudaError_t* status) {
if (*status == cudaErrorMemoryAllocation) {
*status = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(*status);
*status = cudaGetLastError();
if (*status == cudaErrorMemoryAllocation) {
*status = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(*status);
}
void* GPUAllocator::Alloc(size_t* index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does.
@ -120,19 +135,19 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
gpu_alloc_size_ += size;
return p;
} else {
if (result == cudaErrorMemoryAllocation) {
result = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(result);
result = cudaGetLastError();
if (result == cudaErrorMemoryAllocation) {
result = cudaSuccess;
}
PADDLE_ENFORCE_CUDA_SUCCESS(result);
size_t avail, total;
platform::GpuMemoryUsage(&avail, &total);
ClearCUDAOutOfMemoryError(&result);
/**
* NOTE(zjl): Sometimes cudaMemGetInfo would raise OOM error
* if there is very little GPU memory left. In this case, we
* should consider the available GPU memory to be 0, and throw
* exception inside this function instead of throwing exception
* inside cudaMemGetInfo.
*/
size_t avail = 0, total = 0;
result = cudaMemGetInfo(&avail, &total);
if (result != cudaSuccess) avail = 0;
ClearCUDAOutOfMemoryError(&result);
PADDLE_THROW_BAD_ALLOC(
"\n\nOut of memory error on GPU %d. "

Loading…
Cancel
Save