Fix Unit Test: Add Sleep Time for CUDA Retry (#29442)

Add Sleep Time for CUDA Retry, which is similar to our GPU retry logic. This is a try to avoid init GPU allocation random failure in unit test.
revert-31562-mean
Huihuang Zheng 4 years ago committed by GitHub
parent e5e522493d
commit a1909affc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,12 +19,13 @@ limitations under the License. */
#endif // __GNUC__
#if !defined(_WIN32)
#include <dlfcn.h> // dladdr
#else // _WIN32
#include <dlfcn.h> // dladdr
#include <unistd.h> // sleep
#else // _WIN32
#ifndef NOMINMAX
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#include <windows.h> // GetModuleFileName
#include <windows.h> // GetModuleFileName, Sleep
#endif
#ifdef PADDLE_WITH_CUDA
@ -80,6 +81,9 @@ class ErrorSummary;
} // namespace platform
} // namespace paddle
#ifdef PADDLE_WITH_CUDA
DECLARE_int64(gpu_allocator_retry_time);
#endif
DECLARE_int32(call_stack_level);
namespace paddle {
@ -924,6 +928,14 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess);
} \
} while (0)
inline void retry_sleep(unsigned millisecond) {
#ifdef _WIN32
Sleep(millisecond);
#else
sleep(millisecond);
#endif
}
#define PADDLE_RETRY_CUDA_SUCCESS(COND) \
do { \
auto __cond__ = (COND); \
@ -933,6 +945,7 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess);
::paddle::platform::details::CudaStatusType< \
__CUDA_STATUS_TYPE__>::kSuccess; \
while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \
retry_sleep(FLAGS_gpu_allocator_retry_time); \
__cond__ = (COND); \
++retry_count; \
} \

Loading…
Cancel
Save