You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							78 lines
						
					
					
						
							2.8 KiB
						
					
					
				
			
		
		
	
	
							78 lines
						
					
					
						
							2.8 KiB
						
					
					
				| // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //     http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| #include "paddle/fluid/memory/allocation/thread_local_allocator.h"
 | |
| 
 | |
| namespace paddle {
 | |
| namespace memory {
 | |
| namespace allocation {
 | |
| 
 | |
| ThreadLocalAllocatorImpl::ThreadLocalAllocatorImpl(const platform::Place& p)
 | |
|     : place_(p) {
 | |
|   if (platform::is_gpu_place(place_)) {
 | |
|     buddy_allocator_.reset(new memory::detail::BuddyAllocator(
 | |
|         std::unique_ptr<memory::detail::SystemAllocator>(
 | |
|             new memory::detail::GPUAllocator(
 | |
|                 BOOST_GET_CONST(platform::CUDAPlace, place_).device)),
 | |
|         platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()));
 | |
|   } else {
 | |
|     PADDLE_THROW(platform::errors::Unavailable(
 | |
|         "Thread local allocator only supports CUDAPlace now."));
 | |
|   }
 | |
| }
 | |
| 
 | |
| std::shared_ptr<ThreadLocalAllocatorImpl> ThreadLocalCUDAAllocatorPool::Get(
 | |
|     int gpu_id) {
 | |
|   auto pos = std::distance(devices_.begin(),
 | |
|                            std::find(devices_.begin(), devices_.end(), gpu_id));
 | |
|   PADDLE_ENFORCE_LT(
 | |
|       pos, devices_.size(),
 | |
|       platform::errors::InvalidArgument(
 | |
|           "The position of device should be less than the size of devices."));
 | |
|   std::call_once(*init_flags_[pos], [this, pos, gpu_id] {
 | |
|     platform::SetDeviceId(devices_[pos]);
 | |
|     allocators_[pos].reset(
 | |
|         new ThreadLocalAllocatorImpl(platform::CUDAPlace(gpu_id)));
 | |
|   });
 | |
|   return allocators_[pos];
 | |
| }
 | |
| 
 | |
| ThreadLocalCUDAAllocatorPool::ThreadLocalCUDAAllocatorPool()
 | |
|     : devices_(platform::GetSelectedDevices()) {
 | |
|   auto gpu_num = devices_.size();
 | |
|   allocators_.resize(gpu_num);
 | |
|   init_flags_.reserve(gpu_num);
 | |
|   for (size_t i = 0; i < gpu_num; ++i) {
 | |
|     init_flags_.emplace_back(new std::once_flag());
 | |
|   }
 | |
| }
 | |
| 
 | |
| ThreadLocalAllocation* ThreadLocalAllocatorImpl::AllocateImpl(size_t size) {
 | |
|   VLOG(10) << "ThreadLocalAllocatorImpl::AllocateImpl " << size;
 | |
|   void* ptr = buddy_allocator_->Alloc(size);
 | |
|   auto* tl_allocation = new ThreadLocalAllocation(ptr, size, place_);
 | |
|   tl_allocation->SetThreadLocalAllocatorImpl(shared_from_this());
 | |
|   return tl_allocation;
 | |
| }
 | |
| 
 | |
| void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
 | |
|   VLOG(10) << "ThreadLocalAllocatorImpl::FreeImpl " << allocation;
 | |
|   buddy_allocator_->Free(allocation->ptr());
 | |
|   delete allocation;
 | |
| }
 | |
| 
 | |
| }  // namespace allocation
 | |
| }  // namespace memory
 | |
| }  // namespace paddle
 |