commit
f1a392a5fe
@ -1,15 +1,12 @@
|
|||||||
add_subdirectory(detail)
|
add_subdirectory(detail)
|
||||||
|
add_subdirectory(allocation)
|
||||||
cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce)
|
cc_library(malloc SRCS malloc.cc DEPS place enforce allocator_facade)
|
||||||
cc_library(memcpy SRCS memcpy.cc DEPS place)
|
cc_library(memcpy SRCS memcpy.cc DEPS place)
|
||||||
|
|
||||||
cc_library(memory
|
cc_library(memory
|
||||||
DEPS
|
DEPS
|
||||||
malloc
|
malloc
|
||||||
memcpy)
|
memcpy)
|
||||||
|
|
||||||
cc_test(malloc_test SRCS malloc_test.cc DEPS malloc)
|
|
||||||
|
|
||||||
#if (WITH_GPU)
|
#if (WITH_GPU)
|
||||||
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
|
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
|
||||||
#endif()
|
#endif()
|
||||||
|
@ -0,0 +1,64 @@
|
|||||||
|
cc_library(allocator SRCS allocator.cc DEPS place)
|
||||||
|
cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
|
||||||
|
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
|
||||||
|
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
|
||||||
|
cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
|
||||||
|
cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator)
|
||||||
|
cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator)
|
||||||
|
|
||||||
|
if (WITH_GPU)
|
||||||
|
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
cc_library(retry_allocator SRCS retry_allocator.cc DEPS allocator)
|
||||||
|
|
||||||
|
if (WITH_GPU)
|
||||||
|
nv_test(best_fit_allocator_test
|
||||||
|
SRCS best_fit_allocator_test.cc
|
||||||
|
best_fit_allocator_test.cu
|
||||||
|
DEPS best_fit_allocator
|
||||||
|
locked_allocator
|
||||||
|
cpu_allocator
|
||||||
|
cuda_allocator
|
||||||
|
device_context
|
||||||
|
memcpy)
|
||||||
|
else()
|
||||||
|
cc_test(best_fit_allocator_test
|
||||||
|
SRCS best_fit_allocator_test.cc
|
||||||
|
DEPS best_fit_allocator
|
||||||
|
locked_allocator
|
||||||
|
cpu_allocator)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
nv_library(pinned_allocator SRCS pinned_allocator.cc DEPS allocator)
|
||||||
|
if (WITH_GPU)
|
||||||
|
set(AllocatorFacadeDeps gpu_info cuda_allocator pinned_allocator cuda_device_guard)
|
||||||
|
else ()
|
||||||
|
set(AllocatorFacadeDeps)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
|
||||||
|
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
|
||||||
|
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
|
||||||
|
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
|
||||||
|
cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags)
|
||||||
|
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
|
||||||
|
${AllocatorFacadeDeps}
|
||||||
|
cpu_allocator
|
||||||
|
locked_allocator
|
||||||
|
best_fit_allocator
|
||||||
|
aligned_allocator
|
||||||
|
auto_increment_allocator
|
||||||
|
zero_size_allocator
|
||||||
|
conditional_allocator
|
||||||
|
retry_allocator
|
||||||
|
buffered_allocator
|
||||||
|
allocator_strategy
|
||||||
|
legacy_allocator
|
||||||
|
)
|
||||||
|
|
||||||
|
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
|
||||||
|
|
||||||
|
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator best_fit_allocator locked_allocator cpu_allocator)
|
||||||
|
|
||||||
|
cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
|
@ -0,0 +1,31 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
ThinAlignedAllocator::ThinAlignedAllocator(
|
||||||
|
std::shared_ptr<Allocator> underlyning_allocator)
|
||||||
|
: underlying_allocator_(std::move(underlyning_allocator)) {}
|
||||||
|
|
||||||
|
bool ThinAlignedAllocator::IsAllocThreadSafe() const {
|
||||||
|
return underlying_allocator_->IsAllocThreadSafe();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,100 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
// The aligned allocation and allocator will wrap a managed allocator,
|
||||||
|
// and returns the aligned pointer.
|
||||||
|
//
|
||||||
|
// NOTE(yy): For speed reason, I just use a template parameter to get
|
||||||
|
// alignment, however, it can be an private member if necessary.
|
||||||
|
//
|
||||||
|
// NOTE(yy): kAlignment must be 2^N. a `static_assert` should be added.
|
||||||
|
template <size_t kAlignment>
|
||||||
|
class AlignedAllocation : public Allocation {
|
||||||
|
static_assert(kAlignment > 0 && (kAlignment & (kAlignment - 1)) == 0,
|
||||||
|
"kAlignment must be 2^N");
|
||||||
|
|
||||||
|
public:
|
||||||
|
AlignedAllocation(AllocationPtr&& underlying_allocation, size_t size)
|
||||||
|
: Allocation(AlignedPtr(underlying_allocation->ptr()),
|
||||||
|
size + kAlignment - Offset(underlying_allocation->ptr()),
|
||||||
|
underlying_allocation->place()),
|
||||||
|
underlying_allocation_(std::move(underlying_allocation)) {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void* AlignedPtr(void* ptr) {
|
||||||
|
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(ptr) +
|
||||||
|
Offset(ptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset to aligned pointer.
|
||||||
|
// if ptr is already aligned, returns 0.
|
||||||
|
static size_t Offset(void* ptr) {
|
||||||
|
auto ptr_addr = reinterpret_cast<intptr_t>(ptr);
|
||||||
|
intptr_t aligned_addr = (ptr_addr & ~(kAlignment - 1));
|
||||||
|
intptr_t diff = aligned_addr - ptr_addr;
|
||||||
|
if (diff == 0) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return kAlignment + diff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AllocationPtr underlying_allocation_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Thin aligned allocator is trivial and used to generate a small size binary.
|
||||||
|
//
|
||||||
|
// NOTE(yy): This is a trick to make a template class. This class extract the
|
||||||
|
// common code into a `thin` class. So if there are multiple specification of
|
||||||
|
// the template class, the binary size will not extended too much.
|
||||||
|
//
|
||||||
|
// NOTE(yy): This could be an over design. If it harms readability of code, it
|
||||||
|
// could be removed later.
|
||||||
|
class ThinAlignedAllocator : public Allocator {
|
||||||
|
public:
|
||||||
|
explicit ThinAlignedAllocator(
|
||||||
|
std::shared_ptr<Allocator> underlyning_allocator);
|
||||||
|
|
||||||
|
bool IsAllocThreadSafe() const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::shared_ptr<Allocator> underlying_allocator_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// An aligned allocator will allocate `size+kAlignment` allocation and adjust
|
||||||
|
// the pointer offset.
|
||||||
|
template <size_t kAlignment>
|
||||||
|
class AlignedAllocator : public ThinAlignedAllocator {
|
||||||
|
public:
|
||||||
|
using ThinAlignedAllocator::ThinAlignedAllocator;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override {
|
||||||
|
auto raw_allocation =
|
||||||
|
underlying_allocator_->Allocate(size + kAlignment, attr);
|
||||||
|
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,48 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "paddle/fluid/framework/eigen.h"
|
||||||
|
#include "paddle/fluid/framework/tensor.h"
|
||||||
|
#include "paddle/fluid/platform/device_context.h"
|
||||||
|
#include "paddle/fluid/platform/for_range.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/Tensor"
|
||||||
|
|
||||||
|
// NOTE(yy): this unittest is not important. It just used for debugging.
|
||||||
|
// It can be removed later.
|
||||||
|
struct FillZero {
|
||||||
|
public:
|
||||||
|
float* ptr_;
|
||||||
|
|
||||||
|
__device__ void operator()(size_t i) { ptr_[i] = 0.0f; }
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
TEST(Eigen, main) {
|
||||||
|
framework::Tensor tensor;
|
||||||
|
platform::CUDAPlace gpu(0);
|
||||||
|
float* ptr = tensor.mutable_data<float>({10, 10}, gpu);
|
||||||
|
auto& dev_ctx = *reinterpret_cast<platform::CUDADeviceContext*>(
|
||||||
|
platform::DeviceContextPool::Instance().Get(gpu));
|
||||||
|
PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100));
|
||||||
|
|
||||||
|
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, 100);
|
||||||
|
for_range(FillZero{ptr});
|
||||||
|
dev_ctx.Wait();
|
||||||
|
|
||||||
|
auto eigen_vec = framework::EigenVector<float>::Flatten(tensor);
|
||||||
|
auto& eigen_dev = *dev_ctx.eigen_device();
|
||||||
|
eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f);
|
||||||
|
}
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,33 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
class AllocationWithUnderlying : public Allocation {
|
||||||
|
public:
|
||||||
|
explicit AllocationWithUnderlying(AllocationPtr allocation)
|
||||||
|
: Allocation(allocation->ptr(), allocation->size(), allocation->place()),
|
||||||
|
allocation_(std::move(allocation)) {}
|
||||||
|
AllocationPtr allocation_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,45 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
Allocation::~Allocation() {}
|
||||||
|
|
||||||
|
Allocator::~Allocator() {}
|
||||||
|
|
||||||
|
bool Allocator::IsAllocThreadSafe() const { return false; }
|
||||||
|
|
||||||
|
AllocationPtr Allocator::Allocate(size_t size, Allocator::Attr attr) {
|
||||||
|
auto ptr = AllocateImpl(size, attr);
|
||||||
|
ptr->set_allocator(this);
|
||||||
|
return AllocationPtr(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Allocator::Free(Allocation* allocation) { delete allocation; }
|
||||||
|
|
||||||
|
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
|
||||||
|
|
||||||
|
void AllocationDeleter::operator()(Allocation* allocation) const {
|
||||||
|
auto* allocator = allocation->allocator();
|
||||||
|
allocator->Free(allocation);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,145 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include "paddle/fluid/platform/place.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
// Exception when `Alloc`/`AllocShared` failed
|
||||||
|
class BadAlloc : public std::exception {
|
||||||
|
public:
|
||||||
|
explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {}
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string msg_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Allocation;
|
||||||
|
class AllocationDeleter {
|
||||||
|
public:
|
||||||
|
void operator()(Allocation* allocation) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Allocator;
|
||||||
|
// Allocation is the object holding the actually pointer. Use
|
||||||
|
// `Allocation::ptr()` will returns the pointer that allocated.
|
||||||
|
//
|
||||||
|
// NOTE: this is the base class of Allocation. Each allocator can use its own
|
||||||
|
// allocation object.
|
||||||
|
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
|
||||||
|
class Allocation {
|
||||||
|
public:
|
||||||
|
Allocation(void* ptr, size_t size, platform::Place place)
|
||||||
|
: allocator_(nullptr), ptr_(ptr), size_(size), place_(place) {}
|
||||||
|
|
||||||
|
Allocation(const Allocation& o) = delete;
|
||||||
|
Allocation& operator=(const Allocation& o) = delete;
|
||||||
|
|
||||||
|
// Returns the holding pointer.
|
||||||
|
// NOTE: For performance consideration, it is better not to make this method
|
||||||
|
// as a virtual method. If we want to implement a `defragmentation` later,
|
||||||
|
// we might need to make `ptr_` field as a protected field, and add a virtual
|
||||||
|
// method like `defragmentation` to change `ptr_`.
|
||||||
|
void* ptr() const { return ptr_; }
|
||||||
|
|
||||||
|
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
|
||||||
|
// last valid element.
|
||||||
|
//
|
||||||
|
// NOTE: Some allocator might alloc more memory than request. The size
|
||||||
|
// could larger than its request. For example,
|
||||||
|
// the AlignedAllocator will always allocate memory as size + kAlignment.
|
||||||
|
// The raw pointer might not aligned, so an offset might be added to raw
|
||||||
|
// the pointer. The size of this allocation will be
|
||||||
|
// `size + kAlignemnt - offset`.
|
||||||
|
size_t size() const { return size_; }
|
||||||
|
|
||||||
|
const platform::Place& place() const { return place_; }
|
||||||
|
|
||||||
|
Allocator* allocator() { return allocator_; }
|
||||||
|
|
||||||
|
void set_allocator(Allocator* allocator) { allocator_ = allocator; }
|
||||||
|
|
||||||
|
virtual ~Allocation();
|
||||||
|
|
||||||
|
private:
|
||||||
|
Allocator* allocator_;
|
||||||
|
void* ptr_;
|
||||||
|
size_t size_;
|
||||||
|
platform::Place place_;
|
||||||
|
};
|
||||||
|
|
||||||
|
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
|
||||||
|
|
||||||
|
// Base interface class of memory Allocator.
|
||||||
|
// To allocate a memory, allocator needs two parameters:
|
||||||
|
// 1. size of bytes.
|
||||||
|
// 2. Attribute of memory.
|
||||||
|
// NOTE: the attribute of memory might be ignored if the allocator does not
|
||||||
|
// care it.
|
||||||
|
class Allocator {
|
||||||
|
public:
|
||||||
|
enum Attr {
|
||||||
|
kDefault = 0, // Default attribute. Uses the fast or stablest allocation
|
||||||
|
// algorithm.
|
||||||
|
|
||||||
|
kFixedHuge = 1, // The allocation may not be freed until the program
|
||||||
|
// ends. e.g., `Parameters` and `Momentum`.
|
||||||
|
|
||||||
|
kFluxHuge = 2, // The allocation may create and freed frequently and the
|
||||||
|
// allocation is considerable huge. Like `activations`
|
||||||
|
// and gradients.
|
||||||
|
|
||||||
|
kScratchpad =
|
||||||
|
3, // The `Scratchpad` memory is allocated and freed very soon,
|
||||||
|
// usually within an operator or aux memory.
|
||||||
|
// Like CUDNN workspace, AUX memory in batch norm, etc.
|
||||||
|
//
|
||||||
|
// https://en.wikipedia.org/wiki/Scratchpad_memory
|
||||||
|
|
||||||
|
kCrossDevice =
|
||||||
|
4, // The memory used cross-device memory copy/communication.
|
||||||
|
// For example:
|
||||||
|
// 1. it can use an `pinned` memory for CPU-GPU
|
||||||
|
// communication.
|
||||||
|
// 2. it can use an `registered` memory for RDMA
|
||||||
|
// communication.
|
||||||
|
|
||||||
|
NumOfAttrs = 5 // The number of all attributes. It is used internally.
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual ~Allocator();
|
||||||
|
|
||||||
|
// Allocate an allocation.
|
||||||
|
AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault);
|
||||||
|
|
||||||
|
// True if the `Allocate` is thread safe.
|
||||||
|
virtual bool IsAllocThreadSafe() const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void Free(Allocation* allocation);
|
||||||
|
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class AllocationDeleter;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,57 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||||
|
#include "paddle/fluid/platform/place.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
// Allocator Facade is the interface exposed to other modules.
|
||||||
|
// All the configuration or dirty code under development should
|
||||||
|
// be hidden behind this facade.
|
||||||
|
//
|
||||||
|
// NOTE(yy): This class is a singleton class.
|
||||||
|
// NOTE(yy): To create a stable ABI and make compilation faster. Here we use
|
||||||
|
// a Pimpl trick;
|
||||||
|
class AllocatorFacadePrivate;
|
||||||
|
class AllocatorFacade {
|
||||||
|
public:
|
||||||
|
~AllocatorFacade();
|
||||||
|
AllocatorFacade(const AllocatorFacade& o) = delete;
|
||||||
|
const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
|
||||||
|
|
||||||
|
static AllocatorFacade& Instance();
|
||||||
|
|
||||||
|
// Allocate a shared allocation.
|
||||||
|
std::shared_ptr<Allocation> AllocShared(
|
||||||
|
const platform::Place& place, size_t size,
|
||||||
|
Allocator::Attr attr = Allocator::kDefault);
|
||||||
|
|
||||||
|
// Allocate a unique allocation.
|
||||||
|
AllocationPtr Alloc(const platform::Place& place, size_t size,
|
||||||
|
Allocator::Attr attr = Allocator::kDefault);
|
||||||
|
|
||||||
|
// TODO(yy): Allocate a Copy-On-Write allocation?
|
||||||
|
private:
|
||||||
|
AllocatorFacade();
|
||||||
|
AllocatorFacadePrivate* m_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,87 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator_facade.h"
|
||||||
|
#include <gflags/gflags.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
DECLARE_double(fraction_of_gpu_memory_to_use);
|
||||||
|
DECLARE_double(fraction_of_cuda_pinned_memory_to_use);
|
||||||
|
DECLARE_int64(gpu_allocator_retry_time);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
TEST(allocator, allocator) {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
|
||||||
|
FLAGS_gpu_allocator_retry_time = 500;
|
||||||
|
FLAGS_fraction_of_cuda_pinned_memory_to_use = 0.5;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
auto &instance = AllocatorFacade::Instance();
|
||||||
|
platform::Place place;
|
||||||
|
size_t size = 1024;
|
||||||
|
|
||||||
|
{
|
||||||
|
place = platform::CPUPlace();
|
||||||
|
size = 1024;
|
||||||
|
auto cpu_allocation = instance.Alloc(place, size);
|
||||||
|
ASSERT_NE(cpu_allocation, nullptr);
|
||||||
|
ASSERT_NE(cpu_allocation->ptr(), nullptr);
|
||||||
|
ASSERT_EQ(cpu_allocation->place(), place);
|
||||||
|
ASSERT_EQ(cpu_allocation->size(), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
{
|
||||||
|
place = platform::CUDAPlace(0);
|
||||||
|
size = 1024;
|
||||||
|
auto gpu_allocation = instance.Alloc(place, size);
|
||||||
|
ASSERT_NE(gpu_allocation, nullptr);
|
||||||
|
ASSERT_NE(gpu_allocation->ptr(), nullptr);
|
||||||
|
ASSERT_EQ(gpu_allocation->place(), place);
|
||||||
|
ASSERT_GE(gpu_allocation->size(), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Allocate 2GB gpu memory
|
||||||
|
place = platform::CUDAPlace(0);
|
||||||
|
size = 2 * static_cast<size_t>(1 << 30);
|
||||||
|
auto gpu_allocation = instance.Alloc(place, size);
|
||||||
|
ASSERT_NE(gpu_allocation, nullptr);
|
||||||
|
ASSERT_NE(gpu_allocation->ptr(), nullptr);
|
||||||
|
ASSERT_EQ(gpu_allocation->place(), place);
|
||||||
|
ASSERT_GE(gpu_allocation->size(), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
place = platform::CUDAPinnedPlace();
|
||||||
|
size = (1 << 20);
|
||||||
|
auto cuda_pinned_allocation =
|
||||||
|
instance.Alloc(platform::CUDAPinnedPlace(), 1 << 20);
|
||||||
|
ASSERT_NE(cuda_pinned_allocation, nullptr);
|
||||||
|
ASSERT_NE(cuda_pinned_allocation->ptr(), nullptr);
|
||||||
|
ASSERT_EQ(cuda_pinned_allocation->place(), place);
|
||||||
|
ASSERT_GE(cuda_pinned_allocation->size(), size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,41 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
|
||||||
|
#include "gflags/gflags.h"
|
||||||
|
|
||||||
|
DEFINE_string(
|
||||||
|
allocator_strategy, "legacy",
|
||||||
|
"The allocation strategy. Legacy means the original allocator of Fluid."
|
||||||
|
"New means the experimental allocators of Fluid. in [legacy, new]");
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
static AllocatorStrategy GetStrategyFromFlag() {
|
||||||
|
return FLAGS_allocator_strategy == "legacy"
|
||||||
|
? AllocatorStrategy::kLegacy
|
||||||
|
: AllocatorStrategy::kNaiveBestFit;
|
||||||
|
}
|
||||||
|
|
||||||
|
AllocatorStrategy GetAllocatorStrategy() {
|
||||||
|
static AllocatorStrategy strategy = GetStrategyFromFlag();
|
||||||
|
return strategy;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UseAllocatorStrategyGFlag() {}
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,30 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
enum class AllocatorStrategy { kLegacy, kNaiveBestFit };
|
||||||
|
|
||||||
|
extern AllocatorStrategy GetAllocatorStrategy();
|
||||||
|
|
||||||
|
// Do nothing, just make sure linker do not prune this file.
|
||||||
|
extern void UseAllocatorStrategyGFlag();
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,78 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
bool AutoIncrementAllocator::IsAllocThreadSafe() const { return true; }
|
||||||
|
|
||||||
|
std::shared_ptr<Allocator> AutoIncrementAllocator::CreateNewAllocator() {
|
||||||
|
std::lock_guard<std::mutex> guard(mtx_);
|
||||||
|
auto old_size = allocator_num_.load();
|
||||||
|
PADDLE_ENFORCE_LT(old_size, underlying_allocators_.size(),
|
||||||
|
"Allocator number exceeds capacity %d",
|
||||||
|
underlying_allocators_.size());
|
||||||
|
underlying_allocators_[old_size] = creator_();
|
||||||
|
prev_success_allocator_ = old_size;
|
||||||
|
++allocator_num_;
|
||||||
|
PADDLE_ENFORCE(
|
||||||
|
underlying_allocators_[old_size]->IsAllocThreadSafe(),
|
||||||
|
"the underlying allocator must be thread safe. This is a program "
|
||||||
|
"bug.");
|
||||||
|
return underlying_allocators_[old_size];
|
||||||
|
}
|
||||||
|
Allocation *AutoIncrementAllocator::AllocateImpl(size_t size,
|
||||||
|
Allocator::Attr attr) {
|
||||||
|
auto cur = prev_success_allocator_.load();
|
||||||
|
size_t retry_count = allocator_num_.load();
|
||||||
|
size_t allocator_num = retry_count;
|
||||||
|
while (retry_count-- > 0) { // until there retry count is zero
|
||||||
|
try {
|
||||||
|
auto res = underlying_allocators_[cur]->Allocate(size, attr);
|
||||||
|
prev_success_allocator_ = cur;
|
||||||
|
return res.release();
|
||||||
|
} catch (BadAlloc &) {
|
||||||
|
if (++cur >= allocator_num) {
|
||||||
|
cur = 0;
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
// if there is another type of allocation, just rethrow it.
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This happens when the first allocator is exhausted and
|
||||||
|
// there are more than 1 allocation requests
|
||||||
|
// In this situation, the first allocation request would success
|
||||||
|
// and the second allocation request would fail if we do not use
|
||||||
|
// the newly created allocator by the first allocation request.
|
||||||
|
for (cur = allocator_num; cur < allocator_num_; ++cur) {
|
||||||
|
try {
|
||||||
|
auto ret = underlying_allocators_[cur]->Allocate(size, attr);
|
||||||
|
prev_success_allocator_ = cur;
|
||||||
|
return ret.release();
|
||||||
|
} catch (BadAlloc &) {
|
||||||
|
} catch (...) {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No suitable allocator
|
||||||
|
return CreateNewAllocator()->Allocate(size, attr).release();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,79 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic> // NOLINT
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex> // NOLINT
|
||||||
|
#include <thread> // NOLINT
|
||||||
|
#include <vector>
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
// The AutoIncrementAllocator manages many underlying allocators. If none of
|
||||||
|
// them can allocate the request memory, a new allocator will be created and
|
||||||
|
// invoke its `allocate` method.
|
||||||
|
//
|
||||||
|
// NOTE(yy): The AutoIncrementAllocator will prefer to allocate memory from
|
||||||
|
// the latest successful allocator.
|
||||||
|
//
|
||||||
|
// NOTE(yy): We may need to release an underlying allocator if it allocate
|
||||||
|
// nothing. However, it is generally not useful, since it will make performance
|
||||||
|
// undetermined.
|
||||||
|
//
|
||||||
|
// NOTE(yy): This allocator is only locked when creating new underlying
|
||||||
|
// allocator. The allocation requests from many threads may be dispatched
|
||||||
|
// to the same underlying allocator. So the underlying allocator must be
|
||||||
|
// thread safe.
|
||||||
|
//
|
||||||
|
// NOTE(zjl): Add capacity parameters to constructor. A high-performance
|
||||||
|
// thread-safe std::vector with varying size is hard to implement.
|
||||||
|
// Fortunately, we can get the total GPU memory and each chunk size.
|
||||||
|
// Therefore, we can get the suitable capacity of AutoIncrementAllocator.
|
||||||
|
class AutoIncrementAllocator : public Allocator {
|
||||||
|
public:
|
||||||
|
// Creator is the method to create ManagedAllocator
|
||||||
|
using AllocatorCreator = std::function<std::shared_ptr<Allocator>()>;
|
||||||
|
|
||||||
|
explicit AutoIncrementAllocator(AllocatorCreator&& creator, size_t capacity)
|
||||||
|
: creator_(std::move(creator)), underlying_allocators_(capacity) {}
|
||||||
|
|
||||||
|
bool IsAllocThreadSafe() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<Allocator> CreateNewAllocator();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
AllocatorCreator creator_;
|
||||||
|
|
||||||
|
std::vector<AllocatorCreator::result_type> underlying_allocators_;
|
||||||
|
std::atomic<size_t> allocator_num_{0};
|
||||||
|
|
||||||
|
// Use std::atomic rather than std::mutex, since std::atomic is usually
|
||||||
|
// lock-free
|
||||||
|
std::atomic<size_t> prev_success_allocator_{0};
|
||||||
|
|
||||||
|
std::mutex mtx_;
|
||||||
|
};
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,168 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
|
||||||
|
#include <cmath>
|
||||||
|
#include <list>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
static int HighestBitPos(size_t N) {
|
||||||
|
if (UNLIKELY(N == 0)) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
#ifdef __GNUCC__
|
||||||
|
return sizeof(unsigned int) * 8 - __builtin_clz(N);
|
||||||
|
#else
|
||||||
|
return static_cast<int>(std::log2(N) + 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BestFitAllocator::BestFitAllocator(Allocation* allocation)
|
||||||
|
: allocation_(allocation) {
|
||||||
|
details::Chunk chunk;
|
||||||
|
chunk.size_ = allocation_->size();
|
||||||
|
chunk.offset_ = 0;
|
||||||
|
chunk.is_free = true;
|
||||||
|
chunks_.emplace_back(chunk);
|
||||||
|
free_chunks_[HighestBitPos(chunk.size_)].insert(
|
||||||
|
{chunk.size_, chunks_.begin()});
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t BestFitAllocator::FreeSize() const {
|
||||||
|
size_t acc = 0;
|
||||||
|
for (auto& array_item : free_chunks_) {
|
||||||
|
for (auto& pair : array_item) {
|
||||||
|
acc += pair.second->size_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
|
||||||
|
size_t free_chunk_offset,
|
||||||
|
MapIt bin_iterator) {
|
||||||
|
auto to_split_it = bin_iterator->second;
|
||||||
|
free_chunks_[free_chunk_offset].erase(bin_iterator);
|
||||||
|
|
||||||
|
PADDLE_ENFORCE(to_split_it->is_free);
|
||||||
|
PADDLE_ENFORCE_GE(to_split_it->size_, request_size);
|
||||||
|
|
||||||
|
auto remaining_size = to_split_it->size_ - request_size;
|
||||||
|
details::Chunk to_use;
|
||||||
|
details::Chunk remaining;
|
||||||
|
to_use.size_ = request_size;
|
||||||
|
to_use.is_free = false;
|
||||||
|
remaining.size_ = remaining_size;
|
||||||
|
remaining.is_free = true;
|
||||||
|
|
||||||
|
// calc offsets
|
||||||
|
to_use.offset_ = to_split_it->offset_;
|
||||||
|
remaining.offset_ = to_use.offset_ + to_use.size_;
|
||||||
|
|
||||||
|
// insert to chunk list
|
||||||
|
auto to_use_it = chunks_.insert(to_split_it, to_use);
|
||||||
|
if (remaining.size_ != 0) {
|
||||||
|
auto bit_size = static_cast<size_t>(HighestBitPos(remaining.size_));
|
||||||
|
free_chunks_[bit_size].insert(
|
||||||
|
{remaining.size_, chunks_.insert(to_split_it, remaining)});
|
||||||
|
}
|
||||||
|
chunks_.erase(to_split_it);
|
||||||
|
return to_use_it;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BestFitAllocator::InsertFreeNode(const ListIt& it) {
|
||||||
|
auto pos = static_cast<size_t>(HighestBitPos(it->size_));
|
||||||
|
auto& free_map = free_chunks_[pos];
|
||||||
|
free_map.insert({it->size_, it});
|
||||||
|
}
|
||||||
|
void BestFitAllocator::EraseFreeNode(const ListIt& it) {
|
||||||
|
size_t pos = static_cast<size_t>(HighestBitPos(it->size_));
|
||||||
|
auto& free_map = free_chunks_[pos];
|
||||||
|
auto map_it = free_map.find(it->size_);
|
||||||
|
while (map_it->second != it && map_it != free_map.end()) {
|
||||||
|
++map_it;
|
||||||
|
}
|
||||||
|
PADDLE_ENFORCE(map_it != free_map.end());
|
||||||
|
free_map.erase(map_it);
|
||||||
|
}
|
||||||
|
size_t BestFitAllocator::NumFreeChunks() const {
|
||||||
|
size_t num = 0;
|
||||||
|
for (auto& array_item : free_chunks_) {
|
||||||
|
num += array_item.size();
|
||||||
|
}
|
||||||
|
return num;
|
||||||
|
}
|
||||||
|
void BestFitAllocator::Free(Allocation* allocation) {
|
||||||
|
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
|
||||||
|
auto chunk_it = bf_allocation->ChunkIterator();
|
||||||
|
PADDLE_ENFORCE(!chunk_it->is_free);
|
||||||
|
chunk_it->is_free = true;
|
||||||
|
if (chunk_it != chunks_.begin()) {
|
||||||
|
auto prev_it = chunk_it;
|
||||||
|
--prev_it;
|
||||||
|
|
||||||
|
if (prev_it->is_free) {
|
||||||
|
// Merge Left.
|
||||||
|
EraseFreeNode(prev_it);
|
||||||
|
prev_it->size_ += chunk_it->size_;
|
||||||
|
chunks_.erase(chunk_it);
|
||||||
|
chunk_it = prev_it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto next_it = chunk_it;
|
||||||
|
++next_it;
|
||||||
|
if (next_it != chunks_.end() && next_it->is_free) {
|
||||||
|
EraseFreeNode(next_it);
|
||||||
|
chunk_it->size_ += next_it->size_;
|
||||||
|
chunks_.erase(next_it);
|
||||||
|
}
|
||||||
|
|
||||||
|
InsertFreeNode(chunk_it);
|
||||||
|
delete allocation;
|
||||||
|
}
|
||||||
|
Allocation* BestFitAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
|
||||||
|
auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
|
||||||
|
MapIt map_it;
|
||||||
|
for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
|
||||||
|
map_it = free_chunks_[highest_set_bit].lower_bound(size);
|
||||||
|
if (map_it != free_chunks_[highest_set_bit].end()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (UNLIKELY(highest_set_bit == free_chunks_.size())) {
|
||||||
|
throw BadAlloc(string::Sprintf(
|
||||||
|
"Cannot allocate %d, All fragments size is %d", size, FreeSize()));
|
||||||
|
}
|
||||||
|
auto chunk_it = SplitChunk(size, highest_set_bit, map_it);
|
||||||
|
return new BestFitAllocation(this, chunk_it);
|
||||||
|
}
|
||||||
|
|
||||||
|
BestFitAllocation::BestFitAllocation(
|
||||||
|
paddle::memory::allocation::BestFitAllocator* allocator,
|
||||||
|
typename details::ChunkList::iterator chunk_it)
|
||||||
|
: Allocation(reinterpret_cast<void*>(
|
||||||
|
reinterpret_cast<uintptr_t>(allocator->BasePtr()) +
|
||||||
|
chunk_it->offset_),
|
||||||
|
chunk_it->size_, allocator->Place()),
|
||||||
|
chunk_it_(chunk_it) {}
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue