|
|
|
@ -198,9 +198,9 @@ class CudnnHolder {
|
|
|
|
|
CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
|
|
|
|
|
: place_(place), cudnn_holder_(nullptr) {
|
|
|
|
|
SetDeviceId(place_.device);
|
|
|
|
|
compute_capability = GetCUDAComputeCapability(place_.device);
|
|
|
|
|
multi_process = GetCUDAMultiProcessors(place_.device);
|
|
|
|
|
max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
|
|
|
|
|
compute_capability_ = GetCUDAComputeCapability(place_.device);
|
|
|
|
|
multi_process_ = GetCUDAMultiProcessors(place_.device);
|
|
|
|
|
max_threads_per_mp_ = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
|
|
|
|
|
PADDLE_ENFORCE(cudaStreamCreate(&stream_));
|
|
|
|
|
eigen_stream_.reset(new EigenCudaStreamDevice());
|
|
|
|
|
eigen_stream_->Reinitialize(&stream_, place);
|
|
|
|
@ -211,6 +211,16 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
|
|
|
|
|
cudnn_holder_.reset(new CudnnHolder(&stream_, place));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
driver_version_ = GetCUDADriverVersion(place_.device);
|
|
|
|
|
runtime_version_ = GetCUDARuntimeVersion(place_.device);
|
|
|
|
|
|
|
|
|
|
LOG(INFO) << "device: " << place_.device
|
|
|
|
|
<< ", CUDA Capability: " << compute_capability_
|
|
|
|
|
<< ", Driver Version: " << driver_version_ / 1000 << "."
|
|
|
|
|
<< (driver_version_ % 100) / 10
|
|
|
|
|
<< ", Runtime Version: " << runtime_version_ / 1000 << "."
|
|
|
|
|
<< (runtime_version_ % 100) / 10;
|
|
|
|
|
|
|
|
|
|
callback_manager_.reset(new StreamCallbackManager(stream_));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -232,11 +242,11 @@ void CUDADeviceContext::Wait() const {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int CUDADeviceContext::GetComputeCapability() const {
|
|
|
|
|
return compute_capability;
|
|
|
|
|
return compute_capability_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int CUDADeviceContext::GetMaxPhysicalThreadCount() const {
|
|
|
|
|
return multi_process * max_threads_per_mp;
|
|
|
|
|
return multi_process_ * max_threads_per_mp_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Eigen::GpuDevice* CUDADeviceContext::eigen_device() const {
|
|
|
|
|