commit
4c672ab1a2
@ -1,15 +1,12 @@
|
||||
add_subdirectory(detail)
|
||||
|
||||
cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce)
|
||||
add_subdirectory(allocation)
|
||||
cc_library(malloc SRCS malloc.cc DEPS allocator_facade)
|
||||
cc_library(memcpy SRCS memcpy.cc DEPS place)
|
||||
|
||||
cc_library(memory
|
||||
DEPS
|
||||
malloc
|
||||
memcpy)
|
||||
|
||||
cc_test(malloc_test SRCS malloc_test.cc DEPS malloc)
|
||||
|
||||
#if (WITH_GPU)
|
||||
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
|
||||
#endif()
|
||||
|
@ -0,0 +1,51 @@
|
||||
cc_library(allocator SRCS allocator.cc DEPS place)
|
||||
cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
|
||||
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
|
||||
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
|
||||
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard)
|
||||
|
||||
if (WITH_GPU)
|
||||
nv_test(best_fit_allocator_test
|
||||
SRCS best_fit_allocator_test.cc
|
||||
best_fit_allocator_test.cu
|
||||
DEPS best_fit_allocator
|
||||
locked_allocator
|
||||
cpu_allocator
|
||||
cuda_allocator
|
||||
device_context
|
||||
memcpy)
|
||||
else()
|
||||
cc_test(best_fit_allocator_test
|
||||
SRCS best_fit_allocator_test.cc
|
||||
DEPS best_fit_allocator
|
||||
locked_allocator
|
||||
cpu_allocator)
|
||||
endif()
|
||||
|
||||
|
||||
cc_library(naive_managed_allocator SRCS naive_managed_allocator.cc DEPS allocator)
|
||||
cc_test(naive_managed_allocator_test SRCS naive_managed_allocator_test.cc DEPS naive_managed_allocator)
|
||||
nv_library(pinned_allocator SRCS pinned_allocator.cc DEPS allocator)
|
||||
if (WITH_GPU)
|
||||
set(AllocatorFacadeDeps gpu_info cuda_allocator pinned_allocator)
|
||||
else ()
|
||||
set(AllocatorFacadeDeps)
|
||||
endif()
|
||||
|
||||
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
|
||||
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
|
||||
cc_library(zero_size_allocator SRCS zero_size_allocator.cc DEPS allocator)
|
||||
cc_library(conditional_allocator SRCS conditional_allocator.cc DEPS allocator)
|
||||
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
|
||||
${AllocatorFacadeDeps}
|
||||
cpu_allocator
|
||||
locked_allocator
|
||||
best_fit_allocator
|
||||
naive_managed_allocator
|
||||
aligned_allocator
|
||||
auto_increment_allocator
|
||||
zero_size_allocator
|
||||
conditional_allocator
|
||||
cuda_device_guard)
|
||||
|
||||
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
|
@ -0,0 +1,31 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
ThinAlignedAllocator::ThinAlignedAllocator(
|
||||
std::shared_ptr<ManagedAllocator> underlyning_allocator)
|
||||
: underlying_allocator_(std::move(underlyning_allocator)) {}
|
||||
|
||||
std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared(
|
||||
size_t size, Allocator::Attr attr) {
|
||||
return std::shared_ptr<Allocation>(Allocate(size, attr).release());
|
||||
}
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,97 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
// The aligned allocation and allocator will wrap a managed allocator,
|
||||
// and returns the aligned pointer.
|
||||
//
|
||||
// NOTE(yy): For speed reason, I just use a template parameter to get
|
||||
// alignment, however, it can be an private member if necessary.
|
||||
//
|
||||
// NOTE(yy): kAlignment must be 2^N. a `static_assert` should be added.
|
||||
template <size_t kAlignment>
|
||||
class AlignedAllocation : public Allocation {
|
||||
public:
|
||||
AlignedAllocation(std::unique_ptr<Allocation>&& underlying_allocation,
|
||||
size_t size)
|
||||
: Allocation(AlignedPtr(underlying_allocation->ptr()),
|
||||
size + kAlignment - Offset(underlying_allocation->ptr()),
|
||||
underlying_allocation->place()),
|
||||
underlying_allocation_(std::move(underlying_allocation)) {}
|
||||
|
||||
private:
|
||||
static void* AlignedPtr(void* ptr) {
|
||||
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(ptr) +
|
||||
Offset(ptr));
|
||||
}
|
||||
|
||||
// Offset to aligned pointer.
|
||||
// if ptr is already aligned, returns 0.
|
||||
static size_t Offset(void* ptr) {
|
||||
auto ptr_addr = reinterpret_cast<intptr_t>(ptr);
|
||||
intptr_t aligned_addr = (ptr_addr & ~(kAlignment - 1));
|
||||
intptr_t diff = aligned_addr - ptr_addr;
|
||||
if (diff == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return kAlignment + diff;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Allocation> underlying_allocation_;
|
||||
};
|
||||
|
||||
// Thin aligned allocator is trivial and used to generate a small size binary.
|
||||
//
|
||||
// NOTE(yy): This is a trick to make a template class. This class extract the
|
||||
// common code into a `thin` class. So if there are multiple specification of
|
||||
// the template class, the binary size will not extended too much.
|
||||
//
|
||||
// NOTE(yy): This could be an over design. If it harms readability of code, it
|
||||
// could be removed later.
|
||||
class ThinAlignedAllocator : public ManagedAllocator {
|
||||
public:
|
||||
explicit ThinAlignedAllocator(
|
||||
std::shared_ptr<ManagedAllocator> underlyning_allocator);
|
||||
|
||||
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<ManagedAllocator> underlying_allocator_;
|
||||
};
|
||||
|
||||
// An aligned allocator will allocate `size+kAlignment` allocation and adjust
|
||||
// the pointer offset.
|
||||
template <size_t kAlignment>
|
||||
class AlignedAllocator : public ThinAlignedAllocator {
|
||||
public:
|
||||
using ThinAlignedAllocator::ThinAlignedAllocator;
|
||||
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
|
||||
auto raw_allocation =
|
||||
underlying_allocator_->Allocate(size + kAlignment, attr);
|
||||
return std::unique_ptr<Allocation>(
|
||||
new AlignedAllocation<kAlignment>(std::move(raw_allocation), size));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,48 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/framework/eigen.h"
|
||||
#include "paddle/fluid/framework/tensor.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
#include "paddle/fluid/platform/for_range.h"
|
||||
#include "unsupported/Eigen/CXX11/Tensor"
|
||||
|
||||
// NOTE(yy): this unittest is not important. It just used for debugging.
|
||||
// It can be removed later.
|
||||
struct FillZero {
|
||||
public:
|
||||
float* ptr_;
|
||||
|
||||
__device__ void operator()(size_t i) { ptr_[i] = 0.0f; }
|
||||
};
|
||||
|
||||
namespace paddle {
|
||||
TEST(Eigen, main) {
|
||||
framework::Tensor tensor;
|
||||
platform::CUDAPlace gpu(0);
|
||||
float* ptr = tensor.mutable_data<float>({10, 10}, gpu);
|
||||
auto& dev_ctx = *reinterpret_cast<platform::CUDADeviceContext*>(
|
||||
platform::DeviceContextPool::Instance().Get(gpu));
|
||||
PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100));
|
||||
|
||||
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, 100);
|
||||
for_range(FillZero{ptr});
|
||||
dev_ctx.Wait();
|
||||
|
||||
auto eigen_vec = framework::EigenVector<float>::Flatten(tensor);
|
||||
auto& eigen_dev = *dev_ctx.eigen_device();
|
||||
eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f);
|
||||
}
|
||||
} // namespace paddle
|
@ -0,0 +1,29 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
Allocation::~Allocation() {}
|
||||
|
||||
Allocator::~Allocator() {}
|
||||
|
||||
bool Allocator::IsAllocThreadSafe() const { return false; }
|
||||
|
||||
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,161 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <utility>
|
||||
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
// Exception when `Alloc`/`AllocShared` failed
|
||||
class BadAlloc : public std::exception {
|
||||
public:
|
||||
explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {}
|
||||
const char* what() const noexcept override;
|
||||
|
||||
private:
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
// Allocation is the object holding the actually pointer. Use
|
||||
// `Allocation::ptr()` will returns the pointer that allocated.
|
||||
//
|
||||
// NOTE: this is the base class of Allocation. Each allocator can use its own
|
||||
// allocation object.
|
||||
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
|
||||
class Allocation {
|
||||
public:
|
||||
Allocation(void* ptr, size_t size, platform::Place place)
|
||||
: ptr_(ptr), size_(size), place_(place) {}
|
||||
|
||||
Allocation(const Allocation& o) = delete;
|
||||
Allocation& operator=(const Allocation& o) = delete;
|
||||
|
||||
// Returns the holding pointer.
|
||||
// NOTE: For performance consideration, it is better not to make this method
|
||||
// as a virtual method. If we want to implement a `defragmentation` later,
|
||||
// we might need to make `ptr_` field as a protected field, and add a virtual
|
||||
// method like `defragmentation` to change `ptr_`.
|
||||
void* ptr() const { return ptr_; }
|
||||
|
||||
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
|
||||
// last valid element.
|
||||
//
|
||||
// NOTE: Some allocator might alloc more memory than request. The size
|
||||
// could larger than its request. For example,
|
||||
// the AlignedAllocator will always allocate memory as size + kAlignment.
|
||||
// The raw pointer might not aligned, so an offset might be added to raw
|
||||
// the pointer. The size of this allocation will be
|
||||
// `size + kAlignemnt - offset`.
|
||||
size_t size() const { return size_; }
|
||||
|
||||
const platform::Place& place() const { return place_; }
|
||||
|
||||
virtual ~Allocation();
|
||||
|
||||
private:
|
||||
void* ptr_;
|
||||
size_t size_;
|
||||
platform::Place place_;
|
||||
};
|
||||
|
||||
// Base interface class of memory Allocator.
|
||||
// To allocate a memory, allocator needs two parameters:
|
||||
// 1. size of bytes.
|
||||
// 2. Attribute of memory.
|
||||
// NOTE: the attribute of memory might be ignored if the allocator does not
|
||||
// care it.
|
||||
class Allocator {
|
||||
public:
|
||||
enum Attr {
|
||||
kDefault = 0, // Default attribute. Uses the fast or stablest allocation
|
||||
// algorithm.
|
||||
|
||||
kFixedHuge = 1, // The allocation may not be freed until the program
|
||||
// ends. e.g., `Parameters` and `Momentum`.
|
||||
|
||||
kFluxHuge = 2, // The allocation may create and freed frequently and the
|
||||
// allocation is considerable huge. Like `activations`
|
||||
// and gradients.
|
||||
|
||||
kScratchpad =
|
||||
3, // The `Scratchpad` memory is allocated and freed very soon,
|
||||
// usually within an operator or aux memory.
|
||||
// Like CUDNN workspace, AUX memory in batch norm, etc.
|
||||
//
|
||||
// https://en.wikipedia.org/wiki/Scratchpad_memory
|
||||
|
||||
kCrossDevice =
|
||||
4, // The memory used cross-device memory copy/communication.
|
||||
// For example:
|
||||
// 1. it can use an `pinned` memory for CPU-GPU
|
||||
// communication.
|
||||
// 2. it can use an `registered` memory for RDMA
|
||||
// communication.
|
||||
|
||||
NumOfAttrs = 5 // The number of all attributes. It is used internally.
|
||||
};
|
||||
|
||||
virtual ~Allocator();
|
||||
|
||||
// Allocate an allocation. Note the return allocation might need to be freed
|
||||
// manually if the Allocator is an `UnmanagedAllocator`.
|
||||
virtual std::unique_ptr<Allocation> Allocate(
|
||||
size_t size, Allocator::Attr attr = kDefault) = 0;
|
||||
|
||||
// True if the `Allocate` is thread safe.
|
||||
virtual bool IsAllocThreadSafe() const;
|
||||
};
|
||||
|
||||
// User need to invoke `Free` or `FreeUniquePtr` manually if allocated by
|
||||
// a manally managed allocator.
|
||||
class UnmanagedAllocator : public Allocator {
|
||||
public:
|
||||
virtual void Free(Allocation* allocation) = 0;
|
||||
|
||||
void FreeUniquePtr(std::unique_ptr<Allocation> allocation) {
|
||||
Free(allocation.get());
|
||||
}
|
||||
};
|
||||
|
||||
// The allocation will be managed by smart pointers. i.e., users do not need
|
||||
// to free allocation manually.
|
||||
class ManagedAllocator : public Allocator {
|
||||
public:
|
||||
virtual std::shared_ptr<Allocation> AllocateShared(
|
||||
size_t size, Allocator::Attr attr = kDefault) = 0;
|
||||
};
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,182 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/allocator_facade.h"
|
||||
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
|
||||
#include "paddle/fluid/platform/cuda_device_guard.h"
|
||||
#include "paddle/fluid/platform/gpu_info.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
// TODO(yy): Dirty code here. This class should be configurable in runtime.
|
||||
class CPUManagedAllocator : public ManagedAllocator {
|
||||
public:
|
||||
CPUManagedAllocator()
|
||||
: normal_allocator_(NaiveManagedAllocator::Create(
|
||||
std::unique_ptr<Allocator>(new CPUAllocator()))),
|
||||
communication_allocator_(NaiveManagedAllocator::Create(
|
||||
std::unique_ptr<Allocator>(new CPUPinnedAllocator()))) {}
|
||||
|
||||
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
|
||||
if (attr == kCrossDevice) {
|
||||
return communication_allocator_->Allocate(size, attr);
|
||||
} else {
|
||||
return normal_allocator_->Allocate(size, attr);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
|
||||
if (attr == kCrossDevice) {
|
||||
return communication_allocator_->AllocateShared(size, attr);
|
||||
} else {
|
||||
return normal_allocator_->AllocateShared(size, attr);
|
||||
}
|
||||
}
|
||||
bool IsAllocThreadSafe() const override { return true; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<ManagedAllocator> normal_allocator_;
|
||||
std::shared_ptr<ManagedAllocator> communication_allocator_;
|
||||
};
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
// TODO(yy): Dirty code here. This class should be configurable in runtime.
|
||||
class CUDAManagedAllocator : public ManagedAllocator {
|
||||
public:
|
||||
explicit CUDAManagedAllocator(int dev_id) {
|
||||
platform::CUDADeviceGuard guard(dev_id);
|
||||
max_chunk_size_ = platform::GpuMaxChunkSize();
|
||||
raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
|
||||
new CUDAAllocator(platform::CUDAPlace(dev_id))));
|
||||
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
|
||||
[this] { return std::move(BestFitAllocatorCreator()); });
|
||||
|
||||
auto* cond_allocator = new ConditionalAllocator();
|
||||
cond_allocator
|
||||
->AddAllocator(
|
||||
[this](size_t size, Attr attr) { return size < max_chunk_size_; },
|
||||
default_allocator_)
|
||||
.AddAllocator(
|
||||
[](size_t size, Attr attr) {
|
||||
return true; // default case
|
||||
},
|
||||
raw_allocator_);
|
||||
default_allocator_.reset(cond_allocator);
|
||||
}
|
||||
|
||||
~CUDAManagedAllocator() {
|
||||
// Specify destruct order.
|
||||
default_allocator_.reset();
|
||||
chunks_.clear();
|
||||
raw_allocator_.reset();
|
||||
}
|
||||
|
||||
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
|
||||
return default_allocator_->Allocate(size, attr);
|
||||
}
|
||||
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
|
||||
return default_allocator_->AllocateShared(size, attr);
|
||||
}
|
||||
|
||||
std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
|
||||
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
|
||||
auto* allocation = chunks_.back().get();
|
||||
return std::make_shared<AlignedAllocator<64u>>(
|
||||
NaiveManagedAllocator::Create(
|
||||
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
|
||||
}
|
||||
bool IsAllocThreadSafe() const override { return true; }
|
||||
|
||||
private:
|
||||
size_t max_chunk_size_;
|
||||
std::vector<std::unique_ptr<Allocation>> chunks_;
|
||||
std::shared_ptr<ManagedAllocator> raw_allocator_;
|
||||
std::shared_ptr<ManagedAllocator> default_allocator_;
|
||||
};
|
||||
#endif
|
||||
|
||||
class AllocatorFacadePrivate {
|
||||
public:
|
||||
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
|
||||
|
||||
~AllocatorFacadePrivate() = default;
|
||||
|
||||
AllocatorFacadePrivate() {
|
||||
InitCPUAllocator();
|
||||
InitCUDAAllocator();
|
||||
WrapZeroSizeAllocator();
|
||||
}
|
||||
|
||||
private:
|
||||
void InitCPUAllocator() {
|
||||
allocators_[platform::CPUPlace()] = std::make_shared<CPUManagedAllocator>();
|
||||
}
|
||||
|
||||
void InitCUDAAllocator() {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) {
|
||||
allocators_[platform::CUDAPlace(dev_id)] =
|
||||
std::make_shared<CUDAManagedAllocator>(dev_id);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void WrapZeroSizeAllocator() {
|
||||
for (auto& pair : allocators_) {
|
||||
pair.second =
|
||||
std::make_shared<ZeroSizeAllocator>(pair.second, pair.first);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Pimpl. Make interface clean.
|
||||
AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {}
|
||||
AllocatorFacade::~AllocatorFacade() { delete m_; }
|
||||
|
||||
AllocatorFacade& AllocatorFacade::Instance() {
|
||||
static AllocatorFacade instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
|
||||
const platform::Place& place, size_t size, Allocator::Attr attr) {
|
||||
return m_->allocators_[place]->AllocateShared(size, attr);
|
||||
}
|
||||
|
||||
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
|
||||
size_t size,
|
||||
Allocator::Attr attr) {
|
||||
return m_->allocators_[place]->Allocate(size, attr);
|
||||
}
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,57 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
// Allocator Facade is the interface exposed to other modules.
|
||||
// All the configuration or dirty code under development should
|
||||
// be hidden behind this facade.
|
||||
//
|
||||
// NOTE(yy): This class is a singleton class.
|
||||
// NOTE(yy): To create a stable ABI and make compilation faster. Here we use
|
||||
// a Pimpl trick;
|
||||
class AllocatorFacadePrivate;
|
||||
class AllocatorFacade {
|
||||
public:
|
||||
~AllocatorFacade();
|
||||
AllocatorFacade(const AllocatorFacade& o) = delete;
|
||||
const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
|
||||
|
||||
static AllocatorFacade& Instance();
|
||||
|
||||
// Allocate a shared allocation.
|
||||
std::shared_ptr<Allocation> AllocShared(
|
||||
const platform::Place& place, size_t size,
|
||||
Allocator::Attr attr = Allocator::kDefault);
|
||||
|
||||
// Allocate a unique allocation.
|
||||
std::unique_ptr<Allocation> Alloc(const platform::Place& place, size_t size,
|
||||
Allocator::Attr attr = Allocator::kDefault);
|
||||
|
||||
// TODO(yy): Allocate a Copy-On-Write allocation?
|
||||
private:
|
||||
AllocatorFacade();
|
||||
AllocatorFacadePrivate* m_;
|
||||
};
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,39 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
std::unique_ptr<Allocation> AutoIncrementAllocator::Allocate(
|
||||
size_t size, Allocator::Attr attr) {
|
||||
return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
|
||||
return allocator.Allocate(size, attr);
|
||||
});
|
||||
}
|
||||
|
||||
std::shared_ptr<Allocation> AutoIncrementAllocator::AllocateShared(
|
||||
size_t size, Allocator::Attr attr) {
|
||||
return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
|
||||
return allocator.AllocateShared(size, attr);
|
||||
});
|
||||
}
|
||||
|
||||
bool AutoIncrementAllocator::IsAllocThreadSafe() const { return true; }
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,99 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <thread> // NOLINT
|
||||
#include <vector>
|
||||
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
// The AutoIncrementAllocator manages many underlying allocators. If none of
|
||||
// them can allocate the request memory, a new allocator will be created and
|
||||
// invoke its `allocate` method.
|
||||
//
|
||||
// NOTE(yy): The AutoIncrementAllocator will prefer to allocate memory from
|
||||
// the latest sucessful allocator.
|
||||
//
|
||||
// NOTE(yy): We may need to release an underlying allocator if it allocate
|
||||
// nothing. However, it is generally not useful, since it will make performance
|
||||
// undetermined.
|
||||
//
|
||||
// NOTE(yy): This allocator is only locked when creating new underlying
|
||||
// allocator. The allocation requests from many threads may be dispatched
|
||||
// to the same underlying allocator. So the underlying allocator must be
|
||||
// thread safe.
|
||||
class AutoIncrementAllocator : public ManagedAllocator {
|
||||
public:
|
||||
// Creator is the method to create ManagedAllocator
|
||||
using AllocatorCreator = std::function<std::shared_ptr<ManagedAllocator>()>;
|
||||
|
||||
explicit AutoIncrementAllocator(AllocatorCreator&& creator)
|
||||
: creator_(std::move(creator)), prev_success_allocator_{0} {}
|
||||
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override;
|
||||
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
|
||||
bool IsAllocThreadSafe() const override;
|
||||
|
||||
private:
|
||||
// NOTE: here use template Callback, it can be inlined when -O3
|
||||
template <typename Callback>
|
||||
inline typename std::result_of<Callback(ManagedAllocator&)>::type
|
||||
InvokeOrCreateUnderlyingAllocator(Callback callback) {
|
||||
size_t retry_count = underlying_allocators_.size();
|
||||
auto cur = prev_success_allocator_;
|
||||
while (retry_count-- > 0) { // until there retry count is zero
|
||||
try {
|
||||
auto res = callback(*underlying_allocators_[cur]);
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
prev_success_allocator_ = cur;
|
||||
}
|
||||
return std::move(res);
|
||||
} catch (BadAlloc&) {
|
||||
++cur;
|
||||
if (cur >= underlying_allocators_.size()) {
|
||||
cur = 0;
|
||||
}
|
||||
} catch (...) {
|
||||
// if there is another type of allocation, just rethrow it.
|
||||
throw;
|
||||
}
|
||||
}
|
||||
// No suitable allocator
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
underlying_allocators_.emplace_back(creator_());
|
||||
prev_success_allocator_ = underlying_allocators_.size() - 1;
|
||||
PADDLE_ENFORCE(
|
||||
underlying_allocators_[prev_success_allocator_]->IsAllocThreadSafe(),
|
||||
"the underlying allocator must be thread safe. This is a program "
|
||||
"bug.");
|
||||
|
||||
return callback(*underlying_allocators_[prev_success_allocator_]);
|
||||
}
|
||||
}
|
||||
|
||||
AllocatorCreator creator_;
|
||||
std::vector<AllocatorCreator::result_type> underlying_allocators_;
|
||||
size_t prev_success_allocator_{0};
|
||||
std::mutex mtx_; // NOLINT
|
||||
};
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,169 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
|
||||
#include <bits/stdc++.h>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
static int HighestBitPos(size_t N) {
|
||||
if (UNLIKELY(N == 0)) {
|
||||
return 0;
|
||||
} else {
|
||||
// NOTE: here we can use __builtin_clz in GCC.
|
||||
// However, let's use std::log2 for better readability
|
||||
// and trust std::log2's performance.
|
||||
return static_cast<int>(std::log2(N) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
BestFitAllocator::BestFitAllocator(Allocation* allocation)
|
||||
: allocation_(allocation) {
|
||||
details::Chunk chunk;
|
||||
chunk.size_ = allocation_->size();
|
||||
chunk.offset_ = 0;
|
||||
chunk.is_free = true;
|
||||
chunks_.emplace_back(chunk);
|
||||
free_chunks_[HighestBitPos(chunk.size_)].insert(
|
||||
{chunk.size_, chunks_.begin()});
|
||||
}
|
||||
|
||||
std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) {
|
||||
auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
|
||||
MapIt map_it;
|
||||
for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
|
||||
map_it = free_chunks_[highest_set_bit].lower_bound(size);
|
||||
if (map_it != free_chunks_[highest_set_bit].end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (UNLIKELY(highest_set_bit == free_chunks_.size())) {
|
||||
throw BadAlloc(string::Sprintf(
|
||||
"Cannot allocate %d, All fragments size is %d", size, FreeSize()));
|
||||
}
|
||||
auto chunk_it = SplitChunk(size, highest_set_bit, map_it);
|
||||
return std::unique_ptr<Allocation>(new BestFitAllocation(this, chunk_it));
|
||||
}
|
||||
|
||||
size_t BestFitAllocator::FreeSize() const {
|
||||
size_t acc = 0;
|
||||
for (auto& array_item : free_chunks_) {
|
||||
for (auto& pair : array_item) {
|
||||
acc += pair.second->size_;
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
|
||||
size_t free_chunk_offset,
|
||||
MapIt bin_iterator) {
|
||||
auto to_split_it = bin_iterator->second;
|
||||
free_chunks_[free_chunk_offset].erase(bin_iterator);
|
||||
|
||||
PADDLE_ENFORCE(to_split_it->is_free);
|
||||
PADDLE_ENFORCE_GE(to_split_it->size_, request_size);
|
||||
|
||||
auto remaining_size = to_split_it->size_ - request_size;
|
||||
details::Chunk to_use;
|
||||
details::Chunk remaining;
|
||||
to_use.size_ = request_size;
|
||||
to_use.is_free = false;
|
||||
remaining.size_ = remaining_size;
|
||||
remaining.is_free = true;
|
||||
|
||||
// calc offsets
|
||||
to_use.offset_ = to_split_it->offset_;
|
||||
remaining.offset_ = to_use.offset_ + to_use.size_;
|
||||
|
||||
// insert to chunk list
|
||||
auto to_use_it = chunks_.insert(to_split_it, to_use);
|
||||
if (remaining.size_ != 0) {
|
||||
auto bit_size = static_cast<size_t>(HighestBitPos(remaining.size_));
|
||||
free_chunks_[bit_size].insert(
|
||||
{remaining.size_, chunks_.insert(to_split_it, remaining)});
|
||||
}
|
||||
chunks_.erase(to_split_it);
|
||||
return to_use_it;
|
||||
}
|
||||
|
||||
void BestFitAllocator::Free(Allocation* allocation) {
|
||||
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
|
||||
auto chunk_it = bf_allocation->ChunkIterator();
|
||||
PADDLE_ENFORCE(!chunk_it->is_free);
|
||||
chunk_it->is_free = true;
|
||||
if (chunk_it != chunks_.begin()) {
|
||||
auto prev_it = chunk_it;
|
||||
--prev_it;
|
||||
|
||||
if (prev_it->is_free) {
|
||||
// Merge Left.
|
||||
EraseFreeNode(prev_it);
|
||||
prev_it->size_ += chunk_it->size_;
|
||||
chunks_.erase(chunk_it);
|
||||
chunk_it = prev_it;
|
||||
}
|
||||
}
|
||||
|
||||
auto next_it = chunk_it;
|
||||
++next_it;
|
||||
if (next_it != chunks_.end() && next_it->is_free) {
|
||||
EraseFreeNode(next_it);
|
||||
chunk_it->size_ += next_it->size_;
|
||||
chunks_.erase(next_it);
|
||||
}
|
||||
|
||||
InsertFreeNode(chunk_it);
|
||||
}
|
||||
|
||||
void BestFitAllocator::InsertFreeNode(const ListIt& it) {
|
||||
auto pos = static_cast<size_t>(HighestBitPos(it->size_));
|
||||
auto& free_map = free_chunks_[pos];
|
||||
free_map.insert({it->size_, it});
|
||||
}
|
||||
void BestFitAllocator::EraseFreeNode(const ListIt& it) {
|
||||
size_t pos = static_cast<size_t>(HighestBitPos(it->size_));
|
||||
auto& free_map = free_chunks_[pos];
|
||||
auto map_it = free_map.find(it->size_);
|
||||
while (map_it->second != it && map_it != free_map.end()) {
|
||||
++map_it;
|
||||
}
|
||||
PADDLE_ENFORCE(map_it != free_map.end());
|
||||
free_map.erase(map_it);
|
||||
}
|
||||
size_t BestFitAllocator::NumFreeChunks() const {
|
||||
size_t num = 0;
|
||||
for (auto& array_item : free_chunks_) {
|
||||
num += array_item.size();
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
BestFitAllocation::BestFitAllocation(
|
||||
paddle::memory::allocation::BestFitAllocator* allocator,
|
||||
typename details::ChunkList::iterator chunk_it)
|
||||
: Allocation(reinterpret_cast<void*>(
|
||||
reinterpret_cast<uintptr_t>(allocator->BasePtr()) +
|
||||
chunk_it->offset_),
|
||||
chunk_it->size_, allocator->Place()),
|
||||
allocator_(allocator),
|
||||
chunk_it_(chunk_it) {}
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,132 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <array>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
namespace details {
|
||||
struct Chunk {
|
||||
bool is_free{true};
|
||||
// Offset to the base allocation.
|
||||
uintptr_t offset_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
// Here we use std::list to maintain chunk list.
|
||||
// NOTE(yy): The traditional implementation of ChunkList is add `prev`/`next`
|
||||
// pointers in `Chunk`, and split the allocation as `ChunkHeader` and
|
||||
// `Payload`. Such as
|
||||
// *-------*---------------*---------------*--------------*
|
||||
// | Chunk | prev_ pointer | next_ pointer | payload .... |
|
||||
// *-------*---------------*---------------*--------------*
|
||||
// This implementation can just return a raw pointer, and we can get the list
|
||||
// structure by it. However, we cannot use the same code on GPU since CPU
|
||||
// cannot access GPU memory directly.
|
||||
//
|
||||
// So we choose to use `std::list` and return an allocation instance, which
|
||||
// contains the list node iterator, then we can unify CPU/GPU code.
|
||||
//
|
||||
// To return an allocation is not a bad idea, since Tensor/Vector should holds
|
||||
// an allocation instead of raw pointer directly.
|
||||
using ChunkList = std::list<Chunk>;
|
||||
|
||||
// Here we use a multi-level map of free chunks.
|
||||
// the map is
|
||||
// MSB offset --> size --> [ChunkList::iterator]
|
||||
//
|
||||
// The time complexities:
|
||||
// find a free chunk:
|
||||
// O(logN),
|
||||
// where N is the number of free nodes with the same MSB offset.
|
||||
// find the position of a chunk iterator:
|
||||
// O(logN + K),
|
||||
// where N is the number of free nodes with the same MSB offset.
|
||||
// where K is the number of free nodes with the same size.
|
||||
// insert a free chunk:
|
||||
// O(logN),
|
||||
// where N is the number of free nodes with the same MSB offset.
|
||||
// erase a free chunk:
|
||||
// O(1)
|
||||
using FreeChunkBin =
|
||||
std::array<std::multimap<size_t, ChunkList::iterator>, sizeof(size_t) * 8>;
|
||||
} // namespace details
|
||||
|
||||
class BestFitAllocator;
|
||||
|
||||
// The BestFitAllocation maintain the List Node iterator.
|
||||
class BestFitAllocation : public Allocation {
|
||||
private:
|
||||
using ListIt = typename details::ChunkList::iterator;
|
||||
|
||||
public:
|
||||
BestFitAllocation(BestFitAllocator* allocator, ListIt chunk_it);
|
||||
|
||||
const ListIt& ChunkIterator() const { return chunk_it_; }
|
||||
|
||||
private:
|
||||
BestFitAllocator* allocator_;
|
||||
typename details::ChunkList::iterator chunk_it_;
|
||||
};
|
||||
|
||||
// TODO(yy): Current BestFitAllocator is not thread-safe. To make it thread
|
||||
// safe, we must wrap a locked_allocator. However, we can implement a thread
|
||||
// safe allocator by locking each bin and chunks list independently. It will
|
||||
// make BestFitAllocator faster in multi-thread situation.
|
||||
//
|
||||
// This allocator implements a best-fit allocator with merging the free nodes.
|
||||
//
|
||||
// To allocate a buffer, it will find the best-fit chunk. If the best-fit chunk
|
||||
// is larger than request size, the original block will be split into two
|
||||
// chunks. The first block will be used and the second block will be put into
|
||||
// free chunks.
|
||||
//
|
||||
// To free an allocation, it will set the chunk of allocation to free and merge
|
||||
// the prev-chunk and the next-chunk when possible.
|
||||
class BestFitAllocator : public UnmanagedAllocator {
|
||||
public:
|
||||
explicit BestFitAllocator(Allocation* allocation);
|
||||
|
||||
void* BasePtr() const { return allocation_->ptr(); }
|
||||
|
||||
const platform::Place& Place() const { return allocation_->place(); }
|
||||
|
||||
std::unique_ptr<Allocation> Allocate(size_t size,
|
||||
Attr attr = kDefault) override;
|
||||
void Free(Allocation* allocation) override;
|
||||
|
||||
size_t NumFreeChunks() const;
|
||||
|
||||
private:
|
||||
size_t FreeSize() const;
|
||||
using MapIt = typename details::FreeChunkBin::value_type::iterator;
|
||||
using ListIt = typename details::ChunkList::iterator;
|
||||
|
||||
ListIt SplitChunk(size_t request_size, size_t free_chunk_offset,
|
||||
MapIt bin_iterator);
|
||||
void EraseFreeNode(const ListIt& it);
|
||||
void InsertFreeNode(const ListIt& it);
|
||||
|
||||
Allocation* allocation_; // not owned
|
||||
details::ChunkList chunks_;
|
||||
details::FreeChunkBin free_chunks_;
|
||||
};
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,144 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
|
||||
#include <thread> // NOLINT
|
||||
#include <vector>
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
class StubAllocation : public Allocation {
|
||||
public:
|
||||
explicit StubAllocation(size_t size)
|
||||
: Allocation(0, size, platform::CPUPlace()) {}
|
||||
};
|
||||
|
||||
TEST(BestFitAllocator, test_allocation) {
|
||||
StubAllocation stub(4UL * 1024 * 1024 * 1024);
|
||||
BestFitAllocator allocator(&stub);
|
||||
{
|
||||
auto allocation = allocator.Allocate(64);
|
||||
allocator.FreeUniquePtr(std::move(allocation));
|
||||
}
|
||||
|
||||
{
|
||||
auto allocation = allocator.Allocate(80);
|
||||
|
||||
{
|
||||
auto best_fit_allocation =
|
||||
dynamic_cast<BestFitAllocation*>(allocation.get());
|
||||
ASSERT_NE(best_fit_allocation, nullptr);
|
||||
ASSERT_FALSE(best_fit_allocation->ChunkIterator()->is_free);
|
||||
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
|
||||
ASSERT_EQ(allocation->size(), 80);
|
||||
ASSERT_EQ(allocation->ptr(), nullptr);
|
||||
}
|
||||
|
||||
auto allocation2 = allocator.Allocate(60);
|
||||
auto allocation3 = allocator.Allocate(90);
|
||||
allocator.FreeUniquePtr(std::move(allocation2));
|
||||
allocation2 = allocator.Allocate(30);
|
||||
|
||||
{
|
||||
auto best_fit_allocation =
|
||||
dynamic_cast<BestFitAllocation*>(allocation2.get());
|
||||
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
|
||||
}
|
||||
allocator.FreeUniquePtr(std::move(allocation2));
|
||||
|
||||
allocation2 = allocator.Allocate(60);
|
||||
|
||||
{
|
||||
auto best_fit_allocation =
|
||||
dynamic_cast<BestFitAllocation*>(allocation2.get());
|
||||
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
|
||||
}
|
||||
|
||||
allocator.FreeUniquePtr(std::move(allocation));
|
||||
allocator.FreeUniquePtr(std::move(allocation2));
|
||||
|
||||
allocation = allocator.Allocate(80 + 60);
|
||||
{
|
||||
auto best_fit_allocation =
|
||||
dynamic_cast<BestFitAllocation*>(allocation.get());
|
||||
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
|
||||
}
|
||||
|
||||
allocator.FreeUniquePtr(std::move(allocation));
|
||||
|
||||
allocation = allocator.Allocate(80);
|
||||
allocation2 = allocator.Allocate(60);
|
||||
allocator.FreeUniquePtr(std::move(allocation));
|
||||
allocator.FreeUniquePtr(std::move(allocation3));
|
||||
allocator.FreeUniquePtr(std::move(allocation2));
|
||||
|
||||
ASSERT_EQ(allocator.NumFreeChunks(), 1U);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
|
||||
CPUAllocator allocator;
|
||||
auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
|
||||
|
||||
std::unique_ptr<Allocator> best_fit_allocator(
|
||||
new BestFitAllocator(global_allocation.get()));
|
||||
|
||||
LockedAllocator locked_allocator(std::move(best_fit_allocator));
|
||||
|
||||
auto th_main = [&] {
|
||||
std::random_device dev;
|
||||
std::default_random_engine engine(dev());
|
||||
std::uniform_int_distribution<size_t> dist(1U, 1024U);
|
||||
|
||||
for (size_t i = 0; i < 128; ++i) {
|
||||
size_t allocate_size = dist(engine);
|
||||
|
||||
auto allocation =
|
||||
locked_allocator.Allocate(sizeof(size_t) * allocate_size);
|
||||
|
||||
size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
|
||||
|
||||
for (size_t j = 0; j < allocate_size; ++j) {
|
||||
data[j] = j;
|
||||
}
|
||||
std::this_thread::yield();
|
||||
|
||||
for (size_t j = 0; j < allocate_size; ++j) {
|
||||
ASSERT_EQ(data[j], j);
|
||||
}
|
||||
|
||||
locked_allocator.FreeUniquePtr(std::move(allocation));
|
||||
}
|
||||
};
|
||||
{
|
||||
std::vector<std::thread> threads;
|
||||
for (size_t i = 0; i < 1024; ++i) {
|
||||
threads.emplace_back(th_main);
|
||||
}
|
||||
for (auto& th : threads) {
|
||||
th.join();
|
||||
}
|
||||
}
|
||||
|
||||
allocator.FreeUniquePtr(std::move(global_allocation));
|
||||
}
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,88 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <thread> // NOLINT
|
||||
#include <vector>
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
|
||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
|
||||
#include "paddle/fluid/memory/memcpy.h"
|
||||
#include "paddle/fluid/platform/for_range.h"
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
struct ForEachFill {
|
||||
size_t* ptr_;
|
||||
|
||||
explicit ForEachFill(size_t* ptr) : ptr_(ptr) {}
|
||||
|
||||
__device__ void operator()(size_t i) { ptr_[i] = i; }
|
||||
};
|
||||
|
||||
TEST(BestFitAllocator, concurrent_cuda) {
|
||||
CUDAAllocator allocator(platform::CUDAPlace(0));
|
||||
// 256 MB
|
||||
auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
|
||||
LockedAllocator concurrent_allocator(
|
||||
std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
|
||||
|
||||
auto th_main = [&] {
|
||||
std::random_device dev;
|
||||
std::default_random_engine engine(dev());
|
||||
std::uniform_int_distribution<size_t> dist(1U, 1024U);
|
||||
platform::CUDAPlace gpu(0);
|
||||
platform::CUDADeviceContext dev_ctx(gpu);
|
||||
std::array<size_t, 1024> buf;
|
||||
for (size_t i = 0; i < 128; ++i) {
|
||||
size_t allocate_size = dist(engine);
|
||||
|
||||
auto allocation =
|
||||
concurrent_allocator.Allocate(sizeof(size_t) * allocate_size);
|
||||
|
||||
size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
|
||||
|
||||
ForEachFill fill(data);
|
||||
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx,
|
||||
allocate_size);
|
||||
for_range(fill);
|
||||
|
||||
memory::Copy(platform::CPUPlace(), buf.data(), gpu, data,
|
||||
sizeof(size_t) * allocate_size, dev_ctx.stream());
|
||||
|
||||
dev_ctx.Wait();
|
||||
for (size_t j = 0; j < allocate_size; ++j) {
|
||||
ASSERT_EQ(buf[j], j);
|
||||
}
|
||||
|
||||
concurrent_allocator.FreeUniquePtr(std::move(allocation));
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
std::vector<std::thread> threads;
|
||||
for (size_t i = 0; i < 1024; ++i) {
|
||||
threads.emplace_back(th_main);
|
||||
}
|
||||
for (auto& th : threads) {
|
||||
th.join();
|
||||
}
|
||||
}
|
||||
allocator.FreeUniquePtr(std::move(cuda_allocation));
|
||||
}
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,43 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace allocation {
|
||||
|
||||
ConditionalAllocator& ConditionalAllocator::AddAllocator(
|
||||
std::function<bool(size_t, Allocator::Attr)> func,
|
||||
std::shared_ptr<ManagedAllocator> allocator) {
|
||||
underlying_allocators_.emplace_back(std::move(func), std::move(allocator));
|
||||
return *this;
|
||||
}
|
||||
std::unique_ptr<Allocation> ConditionalAllocator::Allocate(
|
||||
size_t size, Allocator::Attr attr) {
|
||||
return SelectAndInvoke(size, attr, [&](ManagedAllocator& allocator) {
|
||||
return allocator.Allocate(size, attr);
|
||||
});
|
||||
}
|
||||
std::shared_ptr<Allocation> ConditionalAllocator::AllocateShared(
|
||||
size_t size, Allocator::Attr attr) {
|
||||
return SelectAndInvoke(size, attr, [&](ManagedAllocator& allocator) {
|
||||
return allocator.AllocateShared(size, attr);
|
||||
});
|
||||
}
|
||||
bool ConditionalAllocator::IsAllocThreadSafe() const { return true; }
|
||||
|
||||
} // namespace allocation
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue