|
|
@ -13,7 +13,9 @@
|
|
|
|
// limitations under the License.
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
|
|
|
|
|
|
|
#include <gflags/gflags.h>
|
|
|
|
#include <map>
|
|
|
|
#include <map>
|
|
|
|
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <vector>
|
|
|
|
#include <vector>
|
|
|
|
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/allocator_facade.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/allocator_facade.h"
|
|
|
@ -24,6 +26,7 @@
|
|
|
|
#include "paddle/fluid/memory/allocation/locked_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/locked_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
|
|
|
|
|
|
|
|
#include "paddle/fluid/memory/allocation/retry_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
|
|
|
|
#include "paddle/fluid/platform/cuda_device_guard.h"
|
|
|
|
#include "paddle/fluid/platform/cuda_device_guard.h"
|
|
|
|
#include "paddle/fluid/platform/gpu_info.h"
|
|
|
|
#include "paddle/fluid/platform/gpu_info.h"
|
|
|
@ -32,6 +35,11 @@
|
|
|
|
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
|
|
|
|
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DEFINE_int32(
|
|
|
|
|
|
|
|
gpu_allocator_retry_time, 0,
|
|
|
|
|
|
|
|
"The retry time (milliseconds) when allocator fails "
|
|
|
|
|
|
|
|
"to allocate memory. No retry if this value is not greater than 0");
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
namespace paddle {
|
|
|
|
namespace memory {
|
|
|
|
namespace memory {
|
|
|
|
namespace allocation {
|
|
|
|
namespace allocation {
|
|
|
@ -60,6 +68,7 @@ class CPUManagedAllocator : public ManagedAllocator {
|
|
|
|
return normal_allocator_->AllocateShared(size, attr);
|
|
|
|
return normal_allocator_->AllocateShared(size, attr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool IsAllocThreadSafe() const override { return true; }
|
|
|
|
bool IsAllocThreadSafe() const override { return true; }
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
private:
|
|
|
@ -86,8 +95,12 @@ class CUDAManagedAllocator : public ManagedAllocator {
|
|
|
|
size_t capacity = available / max_chunk_size_;
|
|
|
|
size_t capacity = available / max_chunk_size_;
|
|
|
|
|
|
|
|
|
|
|
|
if (capacity == 1) {
|
|
|
|
if (capacity == 1) {
|
|
|
|
|
|
|
|
VLOG(10) << "Create BestFitAllocator with chunk_size "
|
|
|
|
|
|
|
|
<< max_chunk_size_;
|
|
|
|
default_allocator_ = BestFitAllocatorCreator();
|
|
|
|
default_allocator_ = BestFitAllocatorCreator();
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
|
|
|
|
VLOG(10) << "Create AutoIncrementAllocator with chunk_size "
|
|
|
|
|
|
|
|
<< max_chunk_size_ << " and capacity " << capacity;
|
|
|
|
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
|
|
|
|
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
|
|
|
|
[this] { return std::move(BestFitAllocatorCreator()); }, capacity);
|
|
|
|
[this] { return std::move(BestFitAllocatorCreator()); }, capacity);
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -116,6 +129,7 @@ class CUDAManagedAllocator : public ManagedAllocator {
|
|
|
|
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
|
|
|
|
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
|
|
|
|
return default_allocator_->Allocate(size, attr);
|
|
|
|
return default_allocator_->Allocate(size, attr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
|
|
|
|
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
|
|
|
|
return default_allocator_->AllocateShared(size, attr);
|
|
|
|
return default_allocator_->AllocateShared(size, attr);
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -123,10 +137,20 @@ class CUDAManagedAllocator : public ManagedAllocator {
|
|
|
|
std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
|
|
|
|
std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
|
|
|
|
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
|
|
|
|
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
|
|
|
|
auto* allocation = chunks_.back().get();
|
|
|
|
auto* allocation = chunks_.back().get();
|
|
|
|
return std::make_shared<AlignedAllocator<64u>>(
|
|
|
|
std::unique_ptr<Allocator> unmanaged_allocator(new LockedAllocator(
|
|
|
|
NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
|
|
|
|
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
|
|
|
|
new LockedAllocator(std::unique_ptr<Allocator>(
|
|
|
|
|
|
|
|
new BestFitAllocator(allocation))))));
|
|
|
|
if (FLAGS_gpu_allocator_retry_time <= 0) {
|
|
|
|
|
|
|
|
VLOG(10) << "Create NaiveManagedAllocator without retry";
|
|
|
|
|
|
|
|
return std::make_shared<AlignedAllocator<64u>>(
|
|
|
|
|
|
|
|
NaiveManagedAllocator::Create(std::move(unmanaged_allocator)));
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
VLOG(10) << "Create RetryAllocator with retry_time "
|
|
|
|
|
|
|
|
<< FLAGS_gpu_allocator_retry_time << "ms";
|
|
|
|
|
|
|
|
return std::make_shared<AlignedAllocator<64u>>(RetryAllocator::Create(
|
|
|
|
|
|
|
|
std::move(unmanaged_allocator),
|
|
|
|
|
|
|
|
static_cast<size_t>(FLAGS_gpu_allocator_retry_time)));
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool IsAllocThreadSafe() const override { return true; }
|
|
|
|
bool IsAllocThreadSafe() const override { return true; }
|
|
|
@ -141,7 +165,8 @@ class CUDAManagedAllocator : public ManagedAllocator {
|
|
|
|
|
|
|
|
|
|
|
|
class AllocatorFacadePrivate {
|
|
|
|
class AllocatorFacadePrivate {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
|
|
|
|
std::unordered_map<platform::Place, std::shared_ptr<ManagedAllocator>>
|
|
|
|
|
|
|
|
allocators_;
|
|
|
|
|
|
|
|
|
|
|
|
~AllocatorFacadePrivate() = default;
|
|
|
|
~AllocatorFacadePrivate() = default;
|
|
|
|
|
|
|
|
|
|
|
@ -184,13 +209,13 @@ AllocatorFacade& AllocatorFacade::Instance() {
|
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
|
|
|
|
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
|
|
|
|
const platform::Place& place, size_t size, Allocator::Attr attr) {
|
|
|
|
const platform::Place& place, size_t size, Allocator::Attr attr) {
|
|
|
|
return m_->allocators_[place]->AllocateShared(size, attr);
|
|
|
|
return m_->allocators_.at(place)->AllocateShared(size, attr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
|
|
|
|
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
|
|
|
|
size_t size,
|
|
|
|
size_t size,
|
|
|
|
Allocator::Attr attr) {
|
|
|
|
Allocator::Attr attr) {
|
|
|
|
return m_->allocators_[place]->Allocate(size, attr);
|
|
|
|
return m_->allocators_.at(place)->Allocate(size, attr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace allocation
|
|
|
|
} // namespace allocation
|
|
|
|