You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
78 lines
2.8 KiB
78 lines
2.8 KiB
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "paddle/fluid/memory/allocation/thread_local_allocator.h"
|
|
|
|
namespace paddle {
|
|
namespace memory {
|
|
namespace allocation {
|
|
|
|
ThreadLocalAllocatorImpl::ThreadLocalAllocatorImpl(const platform::Place& p)
|
|
: place_(p) {
|
|
if (platform::is_gpu_place(place_)) {
|
|
buddy_allocator_.reset(new memory::detail::BuddyAllocator(
|
|
std::unique_ptr<memory::detail::SystemAllocator>(
|
|
new memory::detail::GPUAllocator(
|
|
BOOST_GET_CONST(platform::CUDAPlace, place_).device)),
|
|
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()));
|
|
} else {
|
|
PADDLE_THROW(platform::errors::Unavailable(
|
|
"Thread local allocator only supports CUDAPlace now."));
|
|
}
|
|
}
|
|
|
|
std::shared_ptr<ThreadLocalAllocatorImpl> ThreadLocalCUDAAllocatorPool::Get(
|
|
int gpu_id) {
|
|
auto pos = std::distance(devices_.begin(),
|
|
std::find(devices_.begin(), devices_.end(), gpu_id));
|
|
PADDLE_ENFORCE_LT(
|
|
pos, devices_.size(),
|
|
platform::errors::InvalidArgument(
|
|
"The position of device should be less than the size of devices."));
|
|
std::call_once(*init_flags_[pos], [this, pos, gpu_id] {
|
|
platform::SetDeviceId(devices_[pos]);
|
|
allocators_[pos].reset(
|
|
new ThreadLocalAllocatorImpl(platform::CUDAPlace(gpu_id)));
|
|
});
|
|
return allocators_[pos];
|
|
}
|
|
|
|
ThreadLocalCUDAAllocatorPool::ThreadLocalCUDAAllocatorPool()
|
|
: devices_(platform::GetSelectedDevices()) {
|
|
auto gpu_num = devices_.size();
|
|
allocators_.resize(gpu_num);
|
|
init_flags_.reserve(gpu_num);
|
|
for (size_t i = 0; i < gpu_num; ++i) {
|
|
init_flags_.emplace_back(new std::once_flag());
|
|
}
|
|
}
|
|
|
|
ThreadLocalAllocation* ThreadLocalAllocatorImpl::AllocateImpl(size_t size) {
|
|
VLOG(10) << "ThreadLocalAllocatorImpl::AllocateImpl " << size;
|
|
void* ptr = buddy_allocator_->Alloc(size);
|
|
auto* tl_allocation = new ThreadLocalAllocation(ptr, size, place_);
|
|
tl_allocation->SetThreadLocalAllocatorImpl(shared_from_this());
|
|
return tl_allocation;
|
|
}
|
|
|
|
void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
|
|
VLOG(10) << "ThreadLocalAllocatorImpl::FreeImpl " << allocation;
|
|
buddy_allocator_->Free(allocation->ptr());
|
|
delete allocation;
|
|
}
|
|
|
|
} // namespace allocation
|
|
} // namespace memory
|
|
} // namespace paddle
|