Fix SetDevice on init

7 years ago · 8e3fdc6e65
parent 524f6e9b36
commit 8e3fdc6e65
7 changed files with 52 additions and 6 deletions
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@ -42,3 +42,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
        naive_managed_allocator
        aligned_allocator
        cuda_device_guard)
 nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
--- a/paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
+++ b/paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
@ -0,0 +1,45 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/for_range.h"
 #include "unsupported/Eigen/CXX11/Tensor"
 struct FillZero {
 public:
  float* ptr_;
  __device__ void operator()(size_t i) { ptr_[i] = 0.0f; }
 };
 namespace paddle {
 TEST(Eigen, main) {
  framework::Tensor tensor;
  platform::CUDAPlace gpu(0);
  float* ptr = tensor.mutable_data<float>({10, 10}, gpu);
  auto& dev_ctx = *reinterpret_cast<platform::CUDADeviceContext*>(
      platform::DeviceContextPool::Instance().Get(gpu));
  PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100));
  platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, 100);
  for_range(FillZero{ptr});
  dev_ctx.Wait();
  auto eigen_vec = framework::EigenVector<float>::Flatten(tensor);
  auto& eigen_dev = *dev_ctx.eigen_device();
  eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f);
 }
 }  // namespace paddle
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@ -46,7 +46,6 @@ class AllocatorFacadePrivate {
  }
  AllocatorFacadePrivate() {
    std::cout << "Init Allocator Facade" << std::endl;
    InitCPUAllocator();
    InitCUDAAllocator();
  }
--- a/paddle/fluid/memory/allocation/cuda_allocator.cc
+++ b/paddle/fluid/memory/allocation/cuda_allocator.cc
@ -31,7 +31,6 @@ std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) {
        "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
        status, cudaGetErrorString(status)));
  }
  return std::unique_ptr<Allocation>(
      new CUDAAllocation(ptr, size, platform::Place(place_)));
 }
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@ -72,7 +72,7 @@ cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col)
 cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)
 if(WITH_GPU)
    nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function)
-    nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function)
+    nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function)
 endif()
 cc_test(concat_test SRCS concat_test.cc DEPS concat)
 cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@ -9,11 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/platform/device_context.h"
 #include <set>
 #include <string>
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/memory/memory.h"
 #ifdef PADDLE_WITH_CUDA
@ -205,7 +205,7 @@ class CudnnHolder {
 CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
    : place_(place), cudnn_holder_(nullptr) {
-  SetDeviceId(place_.device);
+  CUDADeviceGuard guard(place_.device);
  compute_capability = GetCUDAComputeCapability(place_.device);
  multi_process = GetCUDAMultiProcessors(place_.device);
  max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/platform/cpu_helper.h"
 #include "paddle/fluid/platform/cpu_info.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/init.h"
 #include "paddle/fluid/platform/place.h"
@ -64,7 +65,7 @@ void InitP2P(std::vector<int> devices) {
          LOG(WARNING) << "Cannot enable P2P access from " << devices[i]
                       << " to " << devices[j];
        } else {
-          cudaSetDevice(devices[i]);
+          platform::CUDADeviceGuard guard(devices[i]);
          cudaDeviceEnablePeerAccess(devices[j], 0);
        }
      }