You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							156 lines
						
					
					
						
							5.2 KiB
						
					
					
				
			
		
		
	
	
							156 lines
						
					
					
						
							5.2 KiB
						
					
					
				| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License. */
 | |
| 
 | |
| #include "paddle/fluid/platform/gpu_info.h"
 | |
| 
 | |
| #include <algorithm>
 | |
| 
 | |
| #include "gflags/gflags.h"
 | |
| #include "paddle/fluid/platform/enforce.h"
 | |
| 
 | |
| DEFINE_double(fraction_of_gpu_memory_to_use, 0.92,
 | |
|               "Default use 92% of GPU memory for PaddlePaddle,"
 | |
|               "reserve the rest for page tables, etc");
 | |
| 
 | |
| namespace paddle {
 | |
| namespace platform {
 | |
| 
 | |
| int GetCUDADeviceCount() {
 | |
|   int count;
 | |
|   PADDLE_ENFORCE(
 | |
|       cudaGetDeviceCount(&count),
 | |
|       "cudaGetDeviceCount failed in paddle::platform::GetCUDADeviceCount");
 | |
|   return count;
 | |
| }
 | |
| 
 | |
| int GetCUDAComputeCapability(int id) {
 | |
|   PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
 | |
|   cudaDeviceProp device_prop;
 | |
|   PADDLE_ENFORCE(cudaGetDeviceProperties(&device_prop, id),
 | |
|                  "cudaGetDeviceProperties failed in "
 | |
|                  "paddle::platform::GetCUDAComputeCapability");
 | |
|   return device_prop.major * 10 + device_prop.minor;
 | |
| }
 | |
| 
 | |
| int GetCUDAMultiProcessors(int id) {
 | |
|   PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
 | |
|   int count;
 | |
|   PADDLE_ENFORCE(
 | |
|       cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id),
 | |
|       "cudaDeviceGetAttribute failed in "
 | |
|       "paddle::platform::GetCUDAMultiProcessors");
 | |
|   return count;
 | |
| }
 | |
| 
 | |
| int GetCUDAMaxThreadsPerMultiProcessor(int id) {
 | |
|   PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
 | |
|   int count;
 | |
|   PADDLE_ENFORCE(cudaDeviceGetAttribute(
 | |
|                      &count, cudaDevAttrMaxThreadsPerMultiProcessor, id),
 | |
|                  "cudaDeviceGetAttribute failed in "
 | |
|                  "paddle::platform::GetCUDAMaxThreadsPerMultiProcessor");
 | |
|   return count;
 | |
| }
 | |
| 
 | |
| int GetCurrentDeviceId() {
 | |
|   int device_id;
 | |
|   PADDLE_ENFORCE(
 | |
|       cudaGetDevice(&device_id),
 | |
|       "cudaGetDevice failed in paddle::platform::GetCurrentDeviceId");
 | |
|   return device_id;
 | |
| }
 | |
| 
 | |
| void SetDeviceId(int id) {
 | |
|   // TODO(qijun): find a better way to cache the cuda device count
 | |
|   PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
 | |
|   PADDLE_ENFORCE(cudaSetDevice(id),
 | |
|                  "cudaSetDevice failed in paddle::platform::SetDeviceId");
 | |
| }
 | |
| 
 | |
| void GpuMemoryUsage(size_t *available, size_t *total) {
 | |
|   PADDLE_ENFORCE(cudaMemGetInfo(available, total),
 | |
|                  "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
 | |
| }
 | |
| 
 | |
| size_t GpuMaxAllocSize() {
 | |
|   size_t total = 0;
 | |
|   size_t available = 0;
 | |
| 
 | |
|   GpuMemoryUsage(&available, &total);
 | |
| 
 | |
|   // Reserve the rest for page tables, etc.
 | |
|   return static_cast<size_t>(total * FLAGS_fraction_of_gpu_memory_to_use);
 | |
| }
 | |
| 
 | |
| size_t GpuMinChunkSize() {
 | |
|   // Allow to allocate the minimum chunk size is 256 bytes.
 | |
|   return 1 << 8;
 | |
| }
 | |
| 
 | |
| size_t GpuMaxChunkSize() {
 | |
|   size_t total = 0;
 | |
|   size_t available = 0;
 | |
| 
 | |
|   GpuMemoryUsage(&available, &total);
 | |
|   VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
 | |
|            << total / 1024 / 1024 << "M";
 | |
|   size_t reserving = static_cast<size_t>(0.05 * total);
 | |
|   // If available less than minimum chunk size, no usable memory exists.
 | |
|   available =
 | |
|       std::min(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(),
 | |
|                total - reserving);
 | |
| 
 | |
|   // Reserving the rest memory for page tables, etc.
 | |
| 
 | |
|   size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
 | |
|                                           (total - reserving));
 | |
| 
 | |
|   PADDLE_ENFORCE_LE(allocating, available);
 | |
| 
 | |
|   return allocating;
 | |
| }
 | |
| 
 | |
| void GpuMemcpyAsync(void *dst, const void *src, size_t count,
 | |
|                     enum cudaMemcpyKind kind, cudaStream_t stream) {
 | |
|   PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream),
 | |
|                  "cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync");
 | |
| }
 | |
| 
 | |
| void GpuMemcpySync(void *dst, const void *src, size_t count,
 | |
|                    enum cudaMemcpyKind kind) {
 | |
|   PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind),
 | |
|                  "cudaMemcpy failed in paddle::platform::GpuMemcpySync");
 | |
| }
 | |
| 
 | |
| void GpuMemcpyPeerAsync(void *dst, int dst_device, const void *src,
 | |
|                         int src_device, size_t count, cudaStream_t stream) {
 | |
|   PADDLE_ENFORCE(
 | |
|       cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream),
 | |
|       "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync");
 | |
| }
 | |
| 
 | |
| void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
 | |
|                        int src_device, size_t count) {
 | |
|   PADDLE_ENFORCE(
 | |
|       cudaMemcpyPeer(dst, dst_device, src, src_device, count),
 | |
|       "cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync");
 | |
| }
 | |
| 
 | |
| void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
 | |
|   PADDLE_ENFORCE(cudaMemsetAsync(dst, value, count, stream),
 | |
|                  "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync");
 | |
| }
 | |
| }  // namespace platform
 | |
| }  // namespace paddle
 |