|
|
|
@ -38,8 +38,7 @@ BuddyAllocator* GetCPUBuddyAllocator() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size,
|
|
|
|
|
bool is_pinned) {
|
|
|
|
|
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
|
|
|
|
|
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
|
|
|
|
|
void* p = GetCPUBuddyAllocator()->Alloc(size);
|
|
|
|
|
VLOG(10) << " pointer=" << p;
|
|
|
|
@ -47,8 +46,7 @@ void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p,
|
|
|
|
|
bool is_pinned) {
|
|
|
|
|
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
|
|
|
|
|
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
|
|
|
|
|
GetCPUBuddyAllocator()->Free(p);
|
|
|
|
|
}
|
|
|
|
@ -85,27 +83,13 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BuddyAllocator* GetCUDAPinnedBuddyAllocator(int gpu_id) {
|
|
|
|
|
static BuddyAllocator** as = NULL;
|
|
|
|
|
static BuddyAllocator* as = NULL;
|
|
|
|
|
if (as == NULL) {
|
|
|
|
|
int gpu_num = platform::GetCUDADeviceCount();
|
|
|
|
|
as = new BuddyAllocator*[gpu_num];
|
|
|
|
|
for (int gpu = 0; gpu < gpu_num; gpu++) {
|
|
|
|
|
as[gpu] = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
platform::SetDeviceId(gpu_id);
|
|
|
|
|
if (!as[gpu_id]) {
|
|
|
|
|
as[gpu_id] = new BuddyAllocator(new detail::CUDAPinnedAllocator,
|
|
|
|
|
platform::GpuMinChunkSize(),
|
|
|
|
|
platform::GpuMaxChunkSize());
|
|
|
|
|
VLOG(10) << "\n\nNOTE: each GPU device use "
|
|
|
|
|
<< FLAGS_fraction_of_gpu_memory_to_use * 100
|
|
|
|
|
<< "% of GPU memory.\n"
|
|
|
|
|
<< "You can set GFlags environment variable '"
|
|
|
|
|
<< "FLAGS_fraction_of_gpu_memory_to_use"
|
|
|
|
|
<< "' to change the fraction of GPU usage.\n\n";
|
|
|
|
|
as = new BuddyAllocator(new detail::CUDAPinnedAllocator,
|
|
|
|
|
platform::CpuMinChunkSize(),
|
|
|
|
|
platform::CpuMaxChunkSize());
|
|
|
|
|
}
|
|
|
|
|
return as[gpu_id];
|
|
|
|
|
return as;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
@ -114,16 +98,9 @@ size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size,
|
|
|
|
|
bool is_pinned) {
|
|
|
|
|
void* ptr;
|
|
|
|
|
if (is_pinned) {
|
|
|
|
|
auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(place.device);
|
|
|
|
|
ptr = buddy_allocator->Alloc(size);
|
|
|
|
|
} else {
|
|
|
|
|
auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
|
|
|
|
|
ptr = buddy_allocator->Alloc(size);
|
|
|
|
|
}
|
|
|
|
|
void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
|
|
|
|
|
auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
|
|
|
|
|
void* ptr = buddy_allocator->Alloc(size);
|
|
|
|
|
|
|
|
|
|
if (ptr == nullptr) {
|
|
|
|
|
int cur_dev = platform::GetCurrentDeviceId();
|
|
|
|
@ -142,13 +119,39 @@ void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p,
|
|
|
|
|
bool is_pinned) {
|
|
|
|
|
if (is_pinned) {
|
|
|
|
|
GetCUDAPinnedBuddyAllocator(place.device)->Free(p);
|
|
|
|
|
} else {
|
|
|
|
|
GetGPUBuddyAllocator(place.device)->Free(p);
|
|
|
|
|
void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
|
|
|
|
|
GetGPUBuddyAllocator(place.device)->Free(p);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Used<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place) {
|
|
|
|
|
return GetGPUBuddyAllocator(place.device)->Used();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place,
|
|
|
|
|
size_t size) {
|
|
|
|
|
auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(place.device);
|
|
|
|
|
void* ptr = buddy_allocator->Alloc(size);
|
|
|
|
|
|
|
|
|
|
if (ptr == nullptr) {
|
|
|
|
|
int cur_dev = platform::GetCurrentDeviceId();
|
|
|
|
|
platform::SetDeviceId(place.device);
|
|
|
|
|
size_t avail, total;
|
|
|
|
|
platform::GpuMemoryUsage(avail, total);
|
|
|
|
|
LOG(WARNING) << "Cannot allocate " << size << " bytes in GPU "
|
|
|
|
|
<< place.device << ", available " << avail << " bytes";
|
|
|
|
|
LOG(WARNING) << "total " << total;
|
|
|
|
|
LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize();
|
|
|
|
|
LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize();
|
|
|
|
|
LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place);
|
|
|
|
|
platform::SetDeviceId(cur_dev);
|
|
|
|
|
}
|
|
|
|
|
return ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void Free<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* p) {
|
|
|
|
|
GetCUDAPinnedBuddyAllocator(place.device)->Free(p);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
@ -165,6 +168,10 @@ size_t Usage::operator()(const platform::CUDAPlace& gpu) const {
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const {
|
|
|
|
|
return Used(cuda_pinned);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t memory_usage(const platform::Place& p) {
|
|
|
|
|
return boost::apply_visitor(Usage(), p);
|
|
|
|
|
}
|
|
|
|
|