|
|
|
@ -26,7 +26,7 @@
|
|
|
|
|
#include "paddle/fluid/platform/profiler.h"
|
|
|
|
|
#include "paddle/fluid/string/printf.h"
|
|
|
|
|
#include "paddle/fluid/string/split.h"
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
#include "paddle/fluid/platform/cuda_device_guard.h"
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef PADDLE_WITH_XPU
|
|
|
|
@ -216,7 +216,7 @@ size_t Used<platform::XPUPlace>(const platform::XPUPlace &place) {
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
class GPUBuddyAllocatorList {
|
|
|
|
|
private:
|
|
|
|
|
GPUBuddyAllocatorList() : devices_(platform::GetSelectedDevices()) {
|
|
|
|
@ -283,7 +283,7 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
size_t Used<platform::CUDAPlace>(const platform::CUDAPlace &place) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if (defined PADDLE_WITH_CUDA || defined PADDLE_WITH_HIP)
|
|
|
|
|
return GetGPUBuddyAllocator(place.device)->Used();
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -294,7 +294,7 @@ size_t Used<platform::CUDAPlace>(const platform::CUDAPlace &place) {
|
|
|
|
|
template <>
|
|
|
|
|
void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
|
|
|
|
|
size_t size) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
auto *buddy_allocator = GetGPUBuddyAllocator(place.device);
|
|
|
|
|
auto *ptr = buddy_allocator->Alloc(size);
|
|
|
|
|
if (ptr == nullptr) {
|
|
|
|
@ -311,7 +311,11 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
|
|
|
|
|
string::HumanReadableSize(Used<platform::CUDAPlace>(place))));
|
|
|
|
|
} else {
|
|
|
|
|
if (FLAGS_init_allocated_mem) {
|
|
|
|
|
#ifdef PADDLE_WITH_HIP
|
|
|
|
|
hipMemset(ptr, 0xEF, size);
|
|
|
|
|
#else
|
|
|
|
|
cudaMemset(ptr, 0xEF, size);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return ptr;
|
|
|
|
@ -324,7 +328,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
|
|
|
|
|
template <>
|
|
|
|
|
void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p,
|
|
|
|
|
size_t size) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
GetGPUBuddyAllocator(place.device)->Free(p);
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -334,7 +338,7 @@ void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p,
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
uint64_t Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
return GetGPUBuddyAllocator(place.device)->Release();
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -342,7 +346,7 @@ uint64_t Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
BuddyAllocator *GetCUDAPinnedBuddyAllocator() {
|
|
|
|
|
static std::once_flag init_flag;
|
|
|
|
|
static BuddyAllocator *ba = nullptr;
|
|
|
|
@ -360,7 +364,7 @@ BuddyAllocator *GetCUDAPinnedBuddyAllocator() {
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
size_t Used<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
return GetCUDAPinnedBuddyAllocator()->Used();
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -371,7 +375,7 @@ size_t Used<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place) {
|
|
|
|
|
template <>
|
|
|
|
|
void *Alloc<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
|
|
|
|
|
size_t size) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
auto *buddy_allocator = GetCUDAPinnedBuddyAllocator();
|
|
|
|
|
void *ptr = buddy_allocator->Alloc(size);
|
|
|
|
|
|
|
|
|
@ -392,7 +396,7 @@ void *Alloc<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
|
|
|
|
|
template <>
|
|
|
|
|
void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
|
|
|
|
|
void *p, size_t size) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
GetCUDAPinnedBuddyAllocator()->Free(p);
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -403,7 +407,7 @@ void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
|
|
|
|
|
template <>
|
|
|
|
|
uint64_t Release<platform::CUDAPinnedPlace>(
|
|
|
|
|
const platform::CUDAPinnedPlace &place) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
return GetCUDAPinnedBuddyAllocator()->Release();
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -449,7 +453,7 @@ size_t Usage::operator()(const platform::CPUPlace &cpu) const {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Usage::operator()(const platform::CUDAPlace &gpu) const {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
return Used(gpu);
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
@ -458,7 +462,7 @@ size_t Usage::operator()(const platform::CUDAPlace &gpu) const {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|
|
|
|
|
return Used(cuda_pinned);
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW(platform::errors::PermissionDenied(
|
|
|
|
|