refactor(memory): rewrite memory allocation and make it extentable

Use OO style to rewrite memory allocation.
panyx0718-patch-1
Yu Yang 6 years ago
parent 643b6faa0c
commit 58ed412f68

@ -30,6 +30,8 @@ class ExceptionHolder {
Catch(exp);
} catch (platform::EnforceNotMet exp) {
Catch(exp);
} catch (std::exception& ex) {
LOG(FATAL) << "std::exception caught, " << ex.what();
} catch (...) {
LOG(FATAL) << "Unknown exception caught";
}

@ -395,11 +395,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
if (!erase_tensors.empty()) gc->Add(erase_tensors);
}
}
if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
}
}
if (gc != nullptr) {
@ -421,13 +416,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
scope->DropKids();
}
}
if (FLAGS_benchmark) {
VLOG(2) << "-------------------------------------------------------";
VLOG(2) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_);
VLOG(2) << "-------------------------------------------------------";
}
}
void Executor::RunPreparedContext(

@ -111,9 +111,6 @@ class LoDTensor : public Tensor {
public:
LoDTensor() : Tensor() {}
/* Constructor with place should only be used in pybind */
explicit LoDTensor(const platform::Place& place) : Tensor(place) {}
explicit LoDTensor(const LoD& lod) : lod_(lod) {}
void set_lod(const LoD& lod) { lod_ = lod; }

@ -23,6 +23,7 @@
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "glog/logging.h"
@ -31,46 +32,6 @@ namespace paddle {
namespace framework {
#if defined(PADDLE_WITH_CUDA)
namespace details {
struct CUDABuffer {
void *data_{nullptr};
size_t size_{0};
platform::CUDAPlace place_;
CUDABuffer() {}
CUDABuffer(platform::Place place, size_t size)
: size_(size), place_(boost::get<platform::CUDAPlace>(place)) {
data_ = memory::Alloc(place_, size);
}
~CUDABuffer() { ClearMemory(); }
CUDABuffer(const CUDABuffer &o) = delete;
CUDABuffer &operator=(const CUDABuffer &o) = delete;
void Resize(platform::Place place, size_t size) {
ClearMemory();
place_ = boost::get<platform::CUDAPlace>(place);
data_ = memory::Alloc(place_, size);
PADDLE_ENFORCE_NOT_NULL(data_);
size_ = size;
}
void Swap(CUDABuffer &o) {
std::swap(data_, o.data_);
std::swap(place_, o.place_);
std::swap(size_, o.size_);
}
private:
void ClearMemory() const {
if (data_ != nullptr) {
memory::Free(place_, data_);
}
}
};
} // namespace details
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template <typename T>
@ -103,8 +64,6 @@ class Vector {
o.ImmutableCPU();
cpu_ = o.cpu_;
flag_ = kDataInCPU;
details::CUDABuffer null;
gpu_.Swap(null);
return *this;
}
@ -199,7 +158,7 @@ class Vector {
PADDLE_ENFORCE(platform::is_gpu_place(place),
"CUDA Data must on CUDA place");
ImmutableCUDA(place);
return reinterpret_cast<T *>(gpu_.data_);
return reinterpret_cast<T *>(gpu_->ptr());
}
// get cuda ptr. mutable
@ -234,13 +193,11 @@ class Vector {
std::mutex &Mutex() const { return mtx_; }
std::unique_ptr<platform::CUDAPlace> CUDAPlace() const {
if (gpu_.data_ == nullptr) {
return nullptr;
} else {
return std::unique_ptr<platform::CUDAPlace>(
new platform::CUDAPlace(gpu_.place_));
}
boost::optional<platform::CUDAPlace> CUDAPlace() const {
return gpu_ == nullptr
? boost::none
: boost::optional<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(gpu_->place()));
}
private:
@ -254,13 +211,12 @@ class Vector {
void CopyToCPU() const {
// COPY GPU Data To CPU
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(
platform::Place(gpu_.place_)));
platform::DeviceContextPool::Instance().Get(gpu_->place()));
auto stream = dev_ctx->stream();
void *src = gpu_.data_;
void *src = gpu_->ptr();
void *dst = cpu_.data();
memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_,
stream);
memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src,
gpu_->size(), stream);
dev_ctx->Wait();
}
@ -277,8 +233,7 @@ class Vector {
CopyCPUDataToCUDA(place);
UnsetFlag(kDirty);
SetFlag(kDataInCUDA);
} else if (IsInCUDA() &&
!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
} else if (IsInCUDA() && !(place == gpu_->place())) {
PADDLE_THROW("This situation should not happen");
// Still dirty
} else {
@ -290,7 +245,7 @@ class Vector {
// Even data is not dirty. However, data is not in CUDA. Copy data.
CopyCPUDataToCUDA(place);
SetFlag(kDataInCUDA);
} else if (!(boost::get<platform::CUDAPlace>(place) == gpu_.place_)) {
} else if (!(place == gpu_->place())) {
PADDLE_THROW("This situation should not happen.");
} else {
// Not Dirty && DataInCUDA && Device is same
@ -301,13 +256,13 @@ class Vector {
void CopyCPUDataToCUDA(const platform::Place &place) const {
void *src = cpu_.data();
gpu_.Resize(place, cpu_.size() * sizeof(T));
void *dst = gpu_.data_;
gpu_ = memory::Alloc(place, cpu_.size() * sizeof(T));
void *dst = gpu_->ptr();
auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
auto stream = dev_ctx->stream();
memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_,
stream);
memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src,
gpu_->size(), stream);
}
void ImmutableCPU() const {
@ -329,7 +284,7 @@ class Vector {
bool IsInCPU() const { return flag_ & kDataInCPU; }
mutable std::vector<T> cpu_;
mutable details::CUDABuffer gpu_;
mutable std::unique_ptr<memory::Allocation> gpu_;
mutable int flag_;
mutable std::mutex mtx_;
@ -428,8 +383,8 @@ class Vector {
auto &mtx = m_.Data().Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace();
if (cuda_place == nullptr ||
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
if (cuda_place == boost::none ||
cuda_place == boost::get<platform::CUDAPlace>(place)) {
return m_.Data().CUDAData(place);
}
}
@ -444,8 +399,8 @@ class Vector {
auto &mtx = m_.Data().Mutex();
std::lock_guard<std::mutex> guard(mtx);
auto cuda_place = m_.Data().CUDAPlace();
if (cuda_place == nullptr ||
*cuda_place == boost::get<platform::CUDAPlace>(place)) {
if (cuda_place == boost::none ||
cuda_place == boost::get<platform::CUDAPlace>(place)) {
return m_.MutableData()->CUDAMutableData(place);
}
}

@ -33,9 +33,7 @@ size_t Tensor::memory_size() const {
void* Tensor::mutable_data(platform::Place place, std::type_index type,
size_t requested_size) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
type_ = type;
PADDLE_ENFORCE_GE(numel(), 0,
"When calling this method, the Tensor's numel must be "
"equal or larger than zero. "
@ -48,25 +46,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type,
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size, type));
} else if (platform::is_gpu_place(place) ||
platform::is_cuda_pinned_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW(
"CUDAPlace or CUDAPinnedPlace is not supported in CPU-only mode.");
}
#else
if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(place), size, type));
} else if (platform::is_cuda_pinned_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CUDAPinnedPlace>(
boost::get<platform::CUDAPinnedPlace>(place), size, type));
}
}
#endif
holder_ = memory::AllocShared(place, size);
offset_ = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
@ -76,7 +56,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type,
void* Tensor::mutable_data(platform::Place place, size_t requested_size) {
PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing.");
return mutable_data(place, holder_->type(), requested_size);
return mutable_data(place, type_, requested_size);
}
Tensor& Tensor::ShareDataWith(const Tensor& src) {
@ -101,6 +81,7 @@ Tensor Tensor::Slice(int begin_idx, int end_idx) const {
Tensor dst;
dst.holder_ = holder_;
dst.set_layout(layout_);
dst.type_ = type_;
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);

@ -67,12 +67,7 @@ class Tensor {
friend struct EigenVector;
public:
Tensor() : offset_(0) {}
/*! Constructor with place should only be used in pybind. */
explicit Tensor(const platform::Place& place) : offset_(0) {
holder_->set_place(place);
}
Tensor() : type_(typeid(float)), offset_(0) {}
/*! Return a pointer to mutable memory block. */
template <typename T>
@ -139,7 +134,7 @@ class Tensor {
std::type_index type() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::type() is called.");
return holder_->type();
return type_;
}
// memory size returns the holding memory size in byte.
@ -154,55 +149,9 @@ class Tensor {
void clear() { holder_ = nullptr; }
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of Variable.
*/
struct Placeholder {
virtual ~Placeholder() = default;
virtual void* ptr() const = 0;
virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
virtual platform::Place place() const = 0;
virtual void set_type(std::type_index type) = 0;
virtual void set_place(platform::Place place) = 0;
};
template <typename Place>
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(Place place, size_t size, std::type_index type)
: ptr_(static_cast<uint8_t*>(memory::Alloc(place, size)),
memory::PODDeleter<uint8_t, Place>(place)),
place_(place),
size_(size),
type_(type) {
PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
(is_cpu_place(place_) ? "CPU" : "GPU"));
}
virtual size_t size() const { return size_; }
virtual platform::Place place() const { return place_; }
virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual std::type_index type() const { return type_; }
virtual void set_type(std::type_index type) { type_ = type; }
virtual void set_place(platform::Place place) { place_ = place; }
/*! the pointer of memory block. */
std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t, Place>> ptr_;
/*! the place of memory block. */
platform::Place place_;
/*! the size of memory block. */
size_t size_;
/* the current type of memory */
std::type_index type_;
};
/*! holds the memory block if allocated. */
std::shared_ptr<Placeholder> holder_;
std::shared_ptr<memory::Allocation> holder_;
std::type_index type_;
/**
* @brief points to elements dimensions.
*

@ -23,10 +23,10 @@ namespace framework {
template <typename T>
inline const T* Tensor::data() const {
check_memory_size();
bool valid = std::is_same<T, void>::value ||
holder_->type() == std::type_index(typeid(T));
bool valid =
std::is_same<T, void>::value || type_ == std::type_index(typeid(T));
PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s",
this->holder_->type().name());
type_.name());
return reinterpret_cast<const T*>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
@ -37,10 +37,10 @@ inline bool Tensor::IsInitialized() const { return holder_ != nullptr; }
template <typename T>
inline T* Tensor::data() {
check_memory_size();
bool valid = std::is_same<T, void>::value ||
holder_->type() == std::type_index(typeid(T));
bool valid =
std::is_same<T, void>::value || type_ == std::type_index(typeid(T));
PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s",
this->holder_->type().name());
type_.name());
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}

@ -1,15 +1,12 @@
add_subdirectory(detail)
cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce)
add_subdirectory(allocation)
cc_library(malloc SRCS malloc.cc DEPS allocator_facade)
cc_library(memcpy SRCS memcpy.cc DEPS place)
cc_library(memory
DEPS
malloc
memcpy)
cc_test(malloc_test SRCS malloc_test.cc DEPS malloc)
#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()

@ -0,0 +1,43 @@
cc_library(allocator SRCS allocator.cc DEPS place)
cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator gpu_info)
if (WITH_GPU)
nv_test(best_fit_allocator_test
SRCS best_fit_allocator_test.cc
best_fit_allocator_test.cu
DEPS best_fit_allocator
locked_allocator
cpu_allocator
cuda_allocator
device_context
memcpy)
else()
cc_test(best_fit_allocator_test
SRCS best_fit_allocator_test.cc
DEPS best_fit_allocator
locked_allocator
cpu_allocator)
endif()
cc_library(naive_managed_allocator SRCS naive_managed_allocator.cc DEPS allocator)
cc_test(naive_managed_allocator_test SRCS naive_managed_allocator_test.cc DEPS naive_managed_allocator)
if (WITH_GPU)
set(AllocatorFacadeDeps gpu_info cuda_allocator)
else ()
set(AllocatorFacadeDeps)
endif()
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
${AllocatorFacadeDeps}
cpu_allocator
locked_allocator
best_fit_allocator
naive_managed_allocator
aligned_allocator)

@ -0,0 +1,26 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
ThinAlignedAllocator::ThinAlignedAllocator(
std::shared_ptr<ManagedAllocator> underlyning_allocator)
: underlying_allocator_(std::move(underlyning_allocator)) {}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,68 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
template <size_t kAlignment>
class AlignedAllocation : public Allocation {
public:
AlignedAllocation(std::unique_ptr<Allocation>&& underlying_allocation,
size_t size)
: Allocation(AlignedPtr(underlying_allocation->ptr()), size,
underlying_allocation->place()),
underlying_allocation_(std::move(underlying_allocation)) {}
private:
static void* AlignedPtr(void* ptr) {
auto ptr_addr = reinterpret_cast<uintptr_t>(ptr);
ptr_addr = (ptr_addr & ~(kAlignment - 1)) + kAlignment;
return reinterpret_cast<void*>(ptr_addr);
}
std::unique_ptr<Allocation> underlying_allocation_;
};
class ThinAlignedAllocator : public ManagedAllocator {
public:
explicit ThinAlignedAllocator(
std::shared_ptr<ManagedAllocator> underlyning_allocator);
protected:
std::shared_ptr<ManagedAllocator> underlying_allocator_;
};
template <size_t kAlignment>
class AlignedAllocator : public ThinAlignedAllocator {
public:
using ThinAlignedAllocator::ThinAlignedAllocator;
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
auto raw_allocation =
underlying_allocator_->Allocate(size + kAlignment, attr);
return std::unique_ptr<Allocation>(
new AlignedAllocation<kAlignment>(std::move(raw_allocation), size));
}
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
return std::shared_ptr<Allocation>(Allocate(size, attr).release());
}
};
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,29 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
Allocation::~Allocation() {}
Allocator::~Allocator() {}
bool Allocator::IsAllocThreadSafe() const { return false; }
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,93 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace memory {
namespace allocation {
class BadAlloc : public std::exception {
public:
explicit BadAlloc(const std::string& msg) : msg_(msg) {}
const char* what() const noexcept override;
private:
std::string msg_;
};
class Allocation {
public:
Allocation(void* ptr, size_t size, platform::Place place)
: ptr_(ptr), size_(size), place_(place) {}
Allocation(const Allocation& o) = delete;
Allocation& operator=(const Allocation& o) = delete;
void* ptr() const { return ptr_; }
size_t size() const { return size_; }
const platform::Place& place() const { return place_; }
virtual ~Allocation();
private:
void* ptr_;
size_t size_;
platform::Place place_;
};
class Allocator {
public:
enum Attr {
kDefault = 0,
kTiny = 1,
kFixedHuge = 2,
kFluxHuge = 3,
kTmp = 4,
NumOfAttrs = 5
};
virtual ~Allocator();
virtual std::unique_ptr<Allocation> Allocate(
size_t size, Allocator::Attr attr = kDefault) = 0;
virtual bool IsAllocThreadSafe() const;
};
// User need to invoke `Free` or `FreeUniquePtr` manually if allocated by
// a manally managed allocator.
class UnmanagedAllocator : public Allocator {
public:
virtual void Free(Allocation* allocation) = 0;
void FreeUniquePtr(std::unique_ptr<Allocation> allocation) {
Free(allocation.get());
}
};
// The allocation will be managed by smart pointers
class ManagedAllocator : public Allocator {
public:
virtual std::shared_ptr<Allocation> AllocateShared(
size_t size, Allocator::Attr attr = kDefault) = 0;
};
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,102 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator.h"
#include <map>
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
#endif
namespace paddle {
namespace memory {
namespace allocation {
class AllocatorFacadePrivate {
public:
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
std::vector<std::unique_ptr<Allocation>> pre_allocations_;
std::vector<std::shared_ptr<Allocator>> holding_allocators_;
~AllocatorFacadePrivate() {
// Specify destruct order.
pre_allocations_.clear();
allocators_.clear();
holding_allocators_.clear();
}
AllocatorFacadePrivate() {
InitCPUAllocator();
InitCUDAAllocator();
}
private:
void InitCPUAllocator() {
auto all = NaiveManagedAllocator::Create(
std::unique_ptr<Allocator>(new CPUAllocator()));
allocators_[platform::CPUPlace()] = all;
}
void InitCUDAAllocator() {
#ifdef PADDLE_WITH_CUDA
for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) {
auto cuda_allocator =
NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new CUDAAllocator(platform::CUDAPlace(dev_id))));
auto allocation = cuda_allocator->Allocate(platform::GpuMaxChunkSize());
auto allocator = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new LockedAllocator(std::unique_ptr<Allocator>(
new BestFitAllocator(allocation.get())))));
pre_allocations_.emplace_back(std::move(allocation));
holding_allocators_.emplace_back(cuda_allocator);
allocators_[platform::CUDAPlace(dev_id)] =
std::make_shared<AlignedAllocator<64>>(std::move(allocator));
}
#endif
}
};
AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {}
AllocatorFacade::~AllocatorFacade() { delete m_; }
AllocatorFacade& AllocatorFacade::Instance() {
static AllocatorFacade instance;
return instance;
}
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) {
return m_->allocators_[place]->AllocateShared(size, attr);
}
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
size_t size,
Allocator::Attr attr) {
return m_->allocators_[place]->Allocate(size, attr);
}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,47 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace memory {
namespace allocation {
class AllocatorFacadePrivate;
class AllocatorFacade {
public:
~AllocatorFacade();
AllocatorFacade(const AllocatorFacade& o) = delete;
const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
static AllocatorFacade& Instance();
std::shared_ptr<Allocation> AllocShared(
const platform::Place& place, size_t size,
Allocator::Attr attr = Allocator::kDefault);
std::unique_ptr<Allocation> Alloc(const platform::Place& place, size_t size,
Allocator::Attr attr = Allocator::kDefault);
private:
AllocatorFacade();
AllocatorFacadePrivate* m_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,169 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include <bits/stdc++.h>
#include <list>
#include <map>
#include <string>
namespace paddle {
namespace memory {
namespace allocation {
static int HighestBitPos(size_t N) {
if (UNLIKELY(N == 0)) {
return 0;
} else {
// NOTE: here we can use __builtin_clz in GCC.
// However, let's use std::log2 for better readability
// and trust std::log2's performance.
return static_cast<int>(std::log2(N) + 1);
}
}
BestFitAllocator::BestFitAllocator(Allocation* allocation)
: allocation_(allocation) {
details::Chunk chunk;
chunk.size_ = allocation_->size();
chunk.offset_ = 0;
chunk.is_free = true;
chunks_.emplace_back(chunk);
free_chunks_[HighestBitPos(chunk.size_)].insert(
{chunk.size_, chunks_.begin()});
}
std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) {
auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
MapIt map_it;
for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
map_it = free_chunks_[highest_set_bit].lower_bound(size);
if (map_it != free_chunks_[highest_set_bit].end()) {
break;
}
}
if (UNLIKELY(highest_set_bit == free_chunks_.size())) {
throw BadAlloc(string::Sprintf(
"Cannot allocate %d, All fragments size is %d", size, FreeSize()));
}
auto chunk_it = SplitChunk(size, highest_set_bit, map_it);
return std::unique_ptr<Allocation>(new BestFitAllocation(this, chunk_it));
}
size_t BestFitAllocator::FreeSize() const {
size_t acc = 0;
for (auto& array_item : free_chunks_) {
for (auto& pair : array_item) {
acc += pair.second->size_;
}
}
return acc;
}
BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
size_t free_chunk_offset,
MapIt bin_iterator) {
auto to_split_it = bin_iterator->second;
free_chunks_[free_chunk_offset].erase(bin_iterator);
PADDLE_ENFORCE(to_split_it->is_free);
PADDLE_ENFORCE_GE(to_split_it->size_, request_size);
auto remaining_size = to_split_it->size_ - request_size;
details::Chunk to_use;
details::Chunk remaining;
to_use.size_ = request_size;
to_use.is_free = false;
remaining.size_ = remaining_size;
remaining.is_free = true;
// calc offsets
to_use.offset_ = to_split_it->offset_;
remaining.offset_ = to_use.offset_ + to_use.size_;
// insert to chunk list
auto to_use_it = chunks_.insert(to_split_it, to_use);
if (remaining.size_ != 0) {
auto bit_size = static_cast<size_t>(HighestBitPos(remaining.size_));
free_chunks_[bit_size].insert(
{remaining.size_, chunks_.insert(to_split_it, remaining)});
}
chunks_.erase(to_split_it);
return to_use_it;
}
void BestFitAllocator::Free(Allocation* allocation) {
auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
auto chunk_it = bf_allocation->ChunkIterator();
PADDLE_ENFORCE(!chunk_it->is_free);
chunk_it->is_free = true;
if (chunk_it != chunks_.begin()) {
auto prev_it = chunk_it;
--prev_it;
if (prev_it->is_free) {
// Merge Left.
EraseFreeNode(prev_it);
prev_it->size_ += chunk_it->size_;
chunks_.erase(chunk_it);
chunk_it = prev_it;
}
}
auto next_it = chunk_it;
++next_it;
if (next_it != chunks_.end() && next_it->is_free) {
EraseFreeNode(next_it);
chunk_it->size_ += next_it->size_;
chunks_.erase(next_it);
}
InsertFreeNode(chunk_it);
}
void BestFitAllocator::InsertFreeNode(const ListIt& it) {
auto pos = static_cast<size_t>(HighestBitPos(it->size_));
auto& free_map = free_chunks_[pos];
free_map.insert({it->size_, it});
}
void BestFitAllocator::EraseFreeNode(const ListIt& it) {
size_t pos = static_cast<size_t>(HighestBitPos(it->size_));
auto& free_map = free_chunks_[pos];
auto map_it = free_map.find(it->size_);
while (map_it->second != it && map_it != free_map.end()) {
++map_it;
}
PADDLE_ENFORCE(map_it != free_map.end());
free_map.erase(map_it);
}
size_t BestFitAllocator::NumFreeChunks() const {
size_t num = 0;
for (auto& array_item : free_chunks_) {
num += array_item.size();
}
return num;
}
BestFitAllocation::BestFitAllocation(
paddle::memory::allocation::BestFitAllocator* allocator,
typename details::ChunkList::iterator chunk_it)
: Allocation(reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(allocator->BasePtr()) +
chunk_it->offset_),
chunk_it->size_, allocator->Place()),
allocator_(allocator),
chunk_it_(chunk_it) {}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,132 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <array>
#include <list>
#include <map>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
namespace details {
struct Chunk {
bool is_free{true};
// Offset to the base allocation.
uintptr_t offset_;
size_t size_;
};
// Here we use std::list to maintain chunk list.
// NOTE(yy): The traditional implementation of ChunkList is add `prev`/`next`
// pointers in `Chunk`, and split the allocation as `ChunkHeader` and
// `Payload`. Such as
// *-------*---------------*---------------*--------------*
// | Chunk | prev_ pointer | next_ pointer | payload .... |
// *-------*---------------*---------------*--------------*
// This implementation can just return a raw pointer, and we can get the list
// structure by it. However, we cannot use the same code on GPU since CPU
// cannot access GPU memory directly.
//
// So we choose to use `std::list` and return an allocation instance, which
// contains the list node iterator, then we can unify CPU/GPU code.
//
// To return an allocation is not a bad idea, since Tensor/Vector should holds
// an allocation instead of raw pointer directly.
using ChunkList = std::list<Chunk>;
// Here we use a multi-level map of free chunks.
// the map is
// MSB offset --> size --> [ChunkList::iterator]
//
// The time complexities:
// find a free chunk:
// O(logN),
// where N is the number of free nodes with the same MSB offset.
// find the position of a chunk iterator:
// O(logN + K),
// where N is the number of free nodes with the same MSB offset.
// where K is the number of free nodes with the same size.
// insert a free chunk:
// O(logN),
// where N is the number of free nodes with the same MSB offset.
// erase a free chunk:
// O(1)
using FreeChunkBin =
std::array<std::multimap<size_t, ChunkList::iterator>, sizeof(size_t) * 8>;
} // namespace details
class BestFitAllocator;
// The BestFitAllocation maintain the List Node iterator.
class BestFitAllocation : public Allocation {
private:
using ListIt = typename details::ChunkList::iterator;
public:
BestFitAllocation(BestFitAllocator* allocator, ListIt chunk_it);
const ListIt& ChunkIterator() const { return chunk_it_; }
private:
BestFitAllocator* allocator_;
typename details::ChunkList::iterator chunk_it_;
};
// TODO(yy): Current BestFitAllocator is not thread-safe. To make it thread
// safe, we must wrap a locked_allocator. However, we can implement a thread
// safe allocator by locking each bin and chunks list independently. It will
// make BestFitAllocator faster in multi-thread situation.
//
// This allocator implements a best-fit allocator with merging the free nodes.
//
// To allocate a buffer, it will find the best-fit chunk. If the best-fit chunk
// is larger than request size, the original block will be split into two
// chunks. The first block will be used and the second block will be put into
// free chunks.
//
// To free an allocation, it will set the chunk of allocation to free and merge
// the prev-chunk and the next-chunk when possible.
class BestFitAllocator : public UnmanagedAllocator {
public:
explicit BestFitAllocator(Allocation* allocation);
void* BasePtr() const { return allocation_->ptr(); }
const platform::Place& Place() const { return allocation_->place(); }
std::unique_ptr<Allocation> Allocate(size_t size,
Attr attr = kDefault) override;
void Free(Allocation* allocation) override;
size_t NumFreeChunks() const;
private:
size_t FreeSize() const;
using MapIt = typename details::FreeChunkBin::value_type::iterator;
using ListIt = typename details::ChunkList::iterator;
ListIt SplitChunk(size_t request_size, size_t free_chunk_offset,
MapIt bin_iterator);
void EraseFreeNode(const ListIt& it);
void InsertFreeNode(const ListIt& it);
Allocation* allocation_; // not owned
details::ChunkList chunks_;
details::FreeChunkBin free_chunks_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,144 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class StubAllocation : public Allocation {
public:
explicit StubAllocation(size_t size)
: Allocation(0, size, platform::CPUPlace()) {}
};
TEST(BestFitAllocator, test_allocation) {
StubAllocation stub(4UL * 1024 * 1024 * 1024);
BestFitAllocator allocator(&stub);
{
auto allocation = allocator.Allocate(64);
allocator.FreeUniquePtr(std::move(allocation));
}
{
auto allocation = allocator.Allocate(80);
{
auto best_fit_allocation =
dynamic_cast<BestFitAllocation*>(allocation.get());
ASSERT_NE(best_fit_allocation, nullptr);
ASSERT_FALSE(best_fit_allocation->ChunkIterator()->is_free);
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
ASSERT_EQ(allocation->size(), 80);
ASSERT_EQ(allocation->ptr(), nullptr);
}
auto allocation2 = allocator.Allocate(60);
auto allocation3 = allocator.Allocate(90);
allocator.FreeUniquePtr(std::move(allocation2));
allocation2 = allocator.Allocate(30);
{
auto best_fit_allocation =
dynamic_cast<BestFitAllocation*>(allocation2.get());
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
}
allocator.FreeUniquePtr(std::move(allocation2));
allocation2 = allocator.Allocate(60);
{
auto best_fit_allocation =
dynamic_cast<BestFitAllocation*>(allocation2.get());
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
}
allocator.FreeUniquePtr(std::move(allocation));
allocator.FreeUniquePtr(std::move(allocation2));
allocation = allocator.Allocate(80 + 60);
{
auto best_fit_allocation =
dynamic_cast<BestFitAllocation*>(allocation.get());
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
}
allocator.FreeUniquePtr(std::move(allocation));
allocation = allocator.Allocate(80);
allocation2 = allocator.Allocate(60);
allocator.FreeUniquePtr(std::move(allocation));
allocator.FreeUniquePtr(std::move(allocation3));
allocator.FreeUniquePtr(std::move(allocation2));
ASSERT_EQ(allocator.NumFreeChunks(), 1U);
}
}
TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
CPUAllocator allocator;
auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
std::unique_ptr<Allocator> best_fit_allocator(
new BestFitAllocator(global_allocation.get()));
LockedAllocator locked_allocator(std::move(best_fit_allocator));
auto th_main = [&] {
std::random_device dev;
std::default_random_engine engine(dev());
std::uniform_int_distribution<size_t> dist(1U, 1024U);
for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine);
auto allocation =
locked_allocator.Allocate(sizeof(size_t) * allocate_size);
size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
for (size_t j = 0; j < allocate_size; ++j) {
data[j] = j;
}
std::this_thread::yield();
for (size_t j = 0; j < allocate_size; ++j) {
ASSERT_EQ(data[j], j);
}
locked_allocator.FreeUniquePtr(std::move(allocation));
}
};
{
std::vector<std::thread> threads;
for (size_t i = 0; i < 1024; ++i) {
threads.emplace_back(th_main);
}
for (auto& th : threads) {
th.join();
}
}
allocator.FreeUniquePtr(std::move(global_allocation));
}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,88 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <thread> // NOLINT
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/for_range.h"
namespace paddle {
namespace memory {
namespace allocation {
struct ForEachFill {
size_t* ptr_;
explicit ForEachFill(size_t* ptr) : ptr_(ptr) {}
__device__ void operator()(size_t i) { ptr_[i] = i; }
};
TEST(BestFitAllocator, concurrent_cuda) {
CUDAAllocator allocator(platform::CUDAPlace(0));
// 256 MB
auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
LockedAllocator concurrent_allocator(
std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
auto th_main = [&] {
std::random_device dev;
std::default_random_engine engine(dev());
std::uniform_int_distribution<size_t> dist(1U, 1024U);
platform::CUDAPlace gpu(0);
platform::CUDADeviceContext dev_ctx(gpu);
std::array<size_t, 1024> buf;
for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine);
auto allocation =
concurrent_allocator.Allocate(sizeof(size_t) * allocate_size);
size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
ForEachFill fill(data);
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx,
allocate_size);
for_range(fill);
memory::Copy(platform::CPUPlace(), buf.data(), gpu, data,
sizeof(size_t) * allocate_size, dev_ctx.stream());
dev_ctx.Wait();
for (size_t j = 0; j < allocate_size; ++j) {
ASSERT_EQ(buf[j], j);
}
concurrent_allocator.FreeUniquePtr(std::move(allocation));
}
};
{
std::vector<std::thread> threads;
for (size_t i = 0; i < 1024; ++i) {
threads.emplace_back(th_main);
}
for (auto& th : threads) {
th.join();
}
}
allocator.FreeUniquePtr(std::move(cuda_allocation));
}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,40 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include <stdlib.h>
#include <string>
namespace paddle {
namespace memory {
namespace allocation {
std::unique_ptr<Allocation> CPUAllocator::Allocate(size_t size, Attr attr) {
void* ptr;
auto status = posix_memalign(&ptr, kAlignment, size);
if (UNLIKELY(status) != 0) {
throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d",
size, status));
}
return std::unique_ptr<Allocation>(new CPUAllocation(ptr, size));
}
void CPUAllocator::Free(Allocation* allocation) {
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation*>(allocation));
free(allocation->ptr());
}
bool CPUAllocator::IsAllocThreadSafe() const { return true; }
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,38 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class CPUAllocation : public Allocation {
public:
CPUAllocation(void* ptr, size_t size)
: Allocation(ptr, size, platform::CPUPlace()) {}
};
class CPUAllocator : public UnmanagedAllocator {
public:
constexpr static size_t kAlignment = 64u;
std::unique_ptr<Allocation> Allocate(size_t size,
Attr attr = kDefault) override;
void Free(Allocation* allocation) override;
bool IsAllocThreadSafe() const override;
};
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,69 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
#include <cuda.h>
#include <cuda_runtime.h>
#include <string>
#include "paddle/fluid/platform/gpu_info.h"
namespace paddle {
namespace memory {
namespace allocation {
class CUDADeviceGuard {
public:
explicit CUDADeviceGuard(int dev_id) {
int prev_id = platform::GetCurrentDeviceId();
if (prev_id != dev_id) {
prev_id_ = prev_id;
platform::SetDeviceId(dev_id);
}
}
~CUDADeviceGuard() {
if (prev_id_ != -1) {
platform::SetDeviceId(prev_id_);
}
}
private:
int prev_id_{-1};
};
std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) {
CUDADeviceGuard guard(place_.device);
void* ptr;
auto status = cudaMalloc(&ptr, size);
if (UNLIKELY(status != cudaSuccess)) {
throw BadAlloc(string::Sprintf(
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status)));
}
return std::unique_ptr<Allocation>(
new CUDAAllocation(ptr, size, platform::Place(place_)));
}
void CUDAAllocator::Free(Allocation* allocation) {
auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
place_);
PADDLE_ENFORCE(cudaFree(allocation->ptr()));
}
bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,45 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace memory {
namespace allocation {
// Just a flag type.
class CUDAAllocation : public Allocation {
public:
using Allocation::Allocation;
};
class CUDAAllocator : public UnmanagedAllocator {
public:
explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
explicit CUDAAllocator(const platform::Place& place)
: place_(boost::get<platform::CUDAPlace>(place)) {}
std::unique_ptr<Allocation> Allocate(size_t size,
Attr attr = kDefault) override;
void Free(Allocation* allocation) override;
bool IsAllocThreadSafe() const override;
private:
platform::CUDAPlace place_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,49 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/locked_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
std::unique_ptr<Allocation> LockedAllocator::Allocate(size_t size, Attr attr) {
if (underlying_allocator_->IsAllocThreadSafe()) {
return underlying_allocator_->Allocate(size, attr);
} else {
std::lock_guard<std::mutex> guard(mtx_);
return underlying_allocator_->Allocate(size, attr);
}
}
void LockedAllocator::Free(Allocation *allocation) {
if (underlying_allocator_->IsAllocThreadSafe()) {
return underlying_allocator_->Free(allocation);
} else {
std::lock_guard<std::mutex> guard(mtx_);
return underlying_allocator_->Free(allocation);
}
}
bool LockedAllocator::IsAllocThreadSafe() const { return true; }
LockedAllocator::LockedAllocator(
std::unique_ptr<Allocator> &&underlying_allocator) {
auto *allocator =
dynamic_cast<UnmanagedAllocator *>(underlying_allocator.get());
PADDLE_ENFORCE_NOT_NULL(allocator);
underlying_allocator.release();
underlying_allocator_.reset(allocator);
}
} // namespace allocation
} // namespace memory
} // namespace paddle

@ -0,0 +1,38 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <thread> // NOLINT
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class LockedAllocator : public UnmanagedAllocator {
public:
explicit LockedAllocator(std::unique_ptr<Allocator>&& underlying_allocator);
std::unique_ptr<Allocation> Allocate(size_t size,
Attr attr = kDefault) override;
void Free(Allocation* allocation) override;
bool IsAllocThreadSafe() const override;
private:
std::unique_ptr<UnmanagedAllocator> underlying_allocator_;
std::mutex mtx_;
};
} // namespace allocation
} // namespace memory
} // namespace paddle

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save