|
|
|
@ -22,6 +22,8 @@ limitations under the License. */
|
|
|
|
|
#include "paddle/fluid/platform/gpu_info.h"
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
#include <cuda_runtime.h>
|
|
|
|
|
|
|
|
|
|
DECLARE_double(fraction_of_gpu_memory_to_use);
|
|
|
|
|
DECLARE_uint64(initial_gpu_memory_in_mb);
|
|
|
|
|
DECLARE_uint64(reallocate_gpu_memory_in_mb);
|
|
|
|
@ -31,9 +33,11 @@ namespace paddle {
|
|
|
|
|
namespace memory {
|
|
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
|
|
constexpr static int test_gpu_id = 0;
|
|
|
|
|
constexpr static int TEST_GPU_ID = 0;
|
|
|
|
|
|
|
|
|
|
void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
|
|
|
|
|
int* TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes,
|
|
|
|
|
bool use_system_allocator = false,
|
|
|
|
|
bool free_ptr = true) {
|
|
|
|
|
bool freed = false;
|
|
|
|
|
size_t used_bytes = allocator->Used();
|
|
|
|
|
|
|
|
|
@ -41,19 +45,25 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
|
|
|
|
|
void* p = allocator->Alloc(size_bytes);
|
|
|
|
|
|
|
|
|
|
EXPECT_NE(p, nullptr);
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
if (size_bytes < platform::GpuMaxChunkSize()) {
|
|
|
|
|
if (size_bytes < allocator->GetMaxChunkSize()) {
|
|
|
|
|
#else
|
|
|
|
|
if (size_bytes < platform::CpuMaxChunkSize()) {
|
|
|
|
|
if (size_bytes < allocator->GetMaxChunkSize()) {
|
|
|
|
|
#endif
|
|
|
|
|
// Not allocate from SystemAllocator
|
|
|
|
|
EXPECT_FALSE(use_system_allocator);
|
|
|
|
|
EXPECT_GE(allocator->Used(), used_bytes + size_bytes);
|
|
|
|
|
} else {
|
|
|
|
|
// Allocate from SystemAllocator doesn't count in Used()
|
|
|
|
|
EXPECT_TRUE(use_system_allocator);
|
|
|
|
|
EXPECT_EQ(allocator->Used(), used_bytes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int* intp = static_cast<int*>(p);
|
|
|
|
|
if (!free_ptr) {
|
|
|
|
|
return intp;
|
|
|
|
|
}
|
|
|
|
|
std::shared_ptr<int> ptr(intp, [&](void* p) {
|
|
|
|
|
allocator->Free(intp);
|
|
|
|
|
freed = true;
|
|
|
|
@ -64,20 +74,30 @@ void TestBuddyAllocator(BuddyAllocator* allocator, size_t size_bytes) {
|
|
|
|
|
|
|
|
|
|
EXPECT_EQ(used_bytes, allocator->Used());
|
|
|
|
|
EXPECT_TRUE(freed);
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
TEST(BuddyAllocator, GpuFraction) {
|
|
|
|
|
// In a 16 GB machine, the pool size will be about 160 MB
|
|
|
|
|
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
|
|
|
|
|
FLAGS_initial_gpu_memory_in_mb = 0;
|
|
|
|
|
FLAGS_reallocate_gpu_memory_in_mb = 0;
|
|
|
|
|
|
|
|
|
|
BuddyAllocator buddy_allocator(
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(test_gpu_id)),
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(TEST_GPU_ID)),
|
|
|
|
|
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
// Less than pool size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10 << 10);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10 << 20);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30));
|
|
|
|
|
|
|
|
|
|
// Greater than max chunk size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 499 << 20,
|
|
|
|
|
/* use_system_allocator = */ true);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30),
|
|
|
|
|
/* use_system_allocator = */ true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(BuddyAllocator, InitRealloc) {
|
|
|
|
@ -87,19 +107,19 @@ TEST(BuddyAllocator, InitRealloc) {
|
|
|
|
|
EXPECT_EQ(platform::GpuMaxChunkSize(), static_cast<size_t>(100 << 20));
|
|
|
|
|
|
|
|
|
|
BuddyAllocator buddy_allocator(
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(test_gpu_id)),
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(TEST_GPU_ID)),
|
|
|
|
|
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
// Less then initial size and reallocate size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10 << 20);
|
|
|
|
|
// Between initial size and reallocate size and not exceed pool
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 80 << 20);
|
|
|
|
|
// Less then reallocate size and exceed pool
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 40 << 20);
|
|
|
|
|
// Greater then reallocate size and exceed pool
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 80 << 20);
|
|
|
|
|
// Greater then initial size and reallocate size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30));
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 99 << 20);
|
|
|
|
|
// Greater than max chunk size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 101 << 20,
|
|
|
|
|
/* use_system_allocator = */ true);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30),
|
|
|
|
|
/* use_system_allocator = */ true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(BuddyAllocator, ReallocSizeGreaterThanInit) {
|
|
|
|
@ -109,23 +129,87 @@ TEST(BuddyAllocator, ReallocSizeGreaterThanInit) {
|
|
|
|
|
EXPECT_EQ(platform::GpuMaxChunkSize(), static_cast<size_t>(10 << 20));
|
|
|
|
|
|
|
|
|
|
BuddyAllocator buddy_allocator(
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(test_gpu_id)),
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(TEST_GPU_ID)),
|
|
|
|
|
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
// Less then initial size and reallocate size
|
|
|
|
|
// Less than initial size and reallocate size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 1 << 20);
|
|
|
|
|
// Between initial size and reallocate size and not exceed pool
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 3 << 20);
|
|
|
|
|
// Less then initial size and exceed pool
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 3 << 20);
|
|
|
|
|
// Less then reallocate size and not exceed pool (now pool is 15 MB, used 7
|
|
|
|
|
// MB)
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 7 << 20);
|
|
|
|
|
// Less then reallocate size and exceed pool
|
|
|
|
|
// Between initial size and reallocate size and exceed pool
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 6 << 20);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 8 << 20);
|
|
|
|
|
// Greater then initial size and reallocate size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30));
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 9 << 20);
|
|
|
|
|
// Greater than max trunk size
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 11 << 20,
|
|
|
|
|
/* use_system_allocator = */ true);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 2 * static_cast<size_t>(1 << 30),
|
|
|
|
|
/* use_system_allocator = */ true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(BuddyAllocator, FractionRefillPool) {
|
|
|
|
|
FLAGS_fraction_of_gpu_memory_to_use = 0.6;
|
|
|
|
|
FLAGS_initial_gpu_memory_in_mb = 0;
|
|
|
|
|
FLAGS_reallocate_gpu_memory_in_mb = 0;
|
|
|
|
|
|
|
|
|
|
size_t max_chunk_size = platform::GpuMaxChunkSize();
|
|
|
|
|
BuddyAllocator buddy_allocator(
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(TEST_GPU_ID)),
|
|
|
|
|
platform::GpuMinChunkSize(), max_chunk_size);
|
|
|
|
|
|
|
|
|
|
// Less than pool size
|
|
|
|
|
int* p0 = TestBuddyAllocator(&buddy_allocator, max_chunk_size - 1000,
|
|
|
|
|
/* use_system_allocator = */ false,
|
|
|
|
|
/* free_ptr = */ false);
|
|
|
|
|
// Max chunk size should be same during allocation
|
|
|
|
|
EXPECT_EQ(max_chunk_size, buddy_allocator.GetMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
size_t alloc =
|
|
|
|
|
platform::GpuAvailableMemToAlloc() * FLAGS_fraction_of_gpu_memory_to_use;
|
|
|
|
|
// Exceed pool trigger refilling size of fraction of avaiable gpu, and should
|
|
|
|
|
// be able to alloc 60% of the remaining GPU
|
|
|
|
|
int* p1 = TestBuddyAllocator(&buddy_allocator, alloc,
|
|
|
|
|
/* use_system_allocator = */ false,
|
|
|
|
|
/* free_ptr = */ false);
|
|
|
|
|
// Max chunk size should be same during allocation
|
|
|
|
|
EXPECT_EQ(max_chunk_size, buddy_allocator.GetMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
alloc =
|
|
|
|
|
platform::GpuAvailableMemToAlloc() * FLAGS_fraction_of_gpu_memory_to_use;
|
|
|
|
|
// Exceed pool trigger refilling size of fraction of avaiable gpu, and should
|
|
|
|
|
// be able to alloc 60% of the remaining GPU
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, alloc,
|
|
|
|
|
/* use_system_allocator = */ false);
|
|
|
|
|
// Max chunk size should be same during allocation
|
|
|
|
|
EXPECT_EQ(max_chunk_size, buddy_allocator.GetMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
buddy_allocator.Free(p0);
|
|
|
|
|
buddy_allocator.Free(p1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(BuddyAllocator, AllocFromAvailable) {
|
|
|
|
|
FLAGS_fraction_of_gpu_memory_to_use = 0.7;
|
|
|
|
|
FLAGS_initial_gpu_memory_in_mb = 0;
|
|
|
|
|
FLAGS_reallocate_gpu_memory_in_mb = 0;
|
|
|
|
|
|
|
|
|
|
size_t total = 0, available = 0;
|
|
|
|
|
platform::SetDeviceId(TEST_GPU_ID);
|
|
|
|
|
platform::GpuMemoryUsage(&available, &total);
|
|
|
|
|
|
|
|
|
|
// Take half of available GPU
|
|
|
|
|
void* p;
|
|
|
|
|
cudaError_t result = cudaMalloc(&p, available >> 1);
|
|
|
|
|
EXPECT_TRUE(result == cudaSuccess);
|
|
|
|
|
|
|
|
|
|
// BuddyAllocator should be able to alloc the remaining GPU
|
|
|
|
|
BuddyAllocator buddy_allocator(
|
|
|
|
|
std::unique_ptr<SystemAllocator>(new GPUAllocator(TEST_GPU_ID)),
|
|
|
|
|
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
|
|
|
|
|
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10 << 10);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, 10 << 20);
|
|
|
|
|
TestBuddyAllocator(&buddy_allocator, static_cast<size_t>(1 << 30));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
|