Fix SetDevice on init

panyx0718-patch-1
Yu Yang 7 years ago
parent 524f6e9b36
commit 8e3fdc6e65

@ -42,3 +42,5 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
naive_managed_allocator naive_managed_allocator
aligned_allocator aligned_allocator
cuda_device_guard) cuda_device_guard)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)

@ -0,0 +1,45 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "unsupported/Eigen/CXX11/Tensor"
struct FillZero {
public:
float* ptr_;
__device__ void operator()(size_t i) { ptr_[i] = 0.0f; }
};
namespace paddle {
TEST(Eigen, main) {
framework::Tensor tensor;
platform::CUDAPlace gpu(0);
float* ptr = tensor.mutable_data<float>({10, 10}, gpu);
auto& dev_ctx = *reinterpret_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(gpu));
PADDLE_ENFORCE(cudaMemset(ptr, 0, sizeof(float) * 100));
platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx, 100);
for_range(FillZero{ptr});
dev_ctx.Wait();
auto eigen_vec = framework::EigenVector<float>::Flatten(tensor);
auto& eigen_dev = *dev_ctx.eigen_device();
eigen_vec.device(eigen_dev) = eigen_vec.constant(0.0f);
}
} // namespace paddle

@ -46,7 +46,6 @@ class AllocatorFacadePrivate {
} }
AllocatorFacadePrivate() { AllocatorFacadePrivate() {
std::cout << "Init Allocator Facade" << std::endl;
InitCPUAllocator(); InitCPUAllocator();
InitCUDAAllocator(); InitCUDAAllocator();
} }

@ -31,7 +31,6 @@ std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) {
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
status, cudaGetErrorString(status))); status, cudaGetErrorString(status)));
} }
return std::unique_ptr<Allocation>( return std::unique_ptr<Allocation>(
new CUDAAllocation(ptr, size, platform::Place(place_))); new CUDAAllocation(ptr, size, platform::Place(place_)));
} }

@ -72,7 +72,7 @@ cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col)
cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)
if(WITH_GPU) if(WITH_GPU)
nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function)
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function)
endif() endif()
cc_test(concat_test SRCS concat_test.cc DEPS concat) cc_test(concat_test SRCS concat_test.cc DEPS concat)
cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info) cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)

@ -9,11 +9,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include <set> #include <set>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
@ -205,7 +205,7 @@ class CudnnHolder {
CUDADeviceContext::CUDADeviceContext(CUDAPlace place) CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
: place_(place), cudnn_holder_(nullptr) { : place_(place), cudnn_holder_(nullptr) {
SetDeviceId(place_.device); CUDADeviceGuard guard(place_.device);
compute_capability = GetCUDAComputeCapability(place_.device); compute_capability = GetCUDAComputeCapability(place_.device);
multi_process = GetCUDAMultiProcessors(place_.device); multi_process = GetCUDAMultiProcessors(place_.device);
max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device); max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);

@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
@ -64,7 +65,7 @@ void InitP2P(std::vector<int> devices) {
LOG(WARNING) << "Cannot enable P2P access from " << devices[i] LOG(WARNING) << "Cannot enable P2P access from " << devices[i]
<< " to " << devices[j]; << " to " << devices[j];
} else { } else {
cudaSetDevice(devices[i]); platform::CUDADeviceGuard guard(devices[i]);
cudaDeviceEnablePeerAccess(devices[j], 0); cudaDeviceEnablePeerAccess(devices[j], 0);
} }
} }

Loading…
Cancel
Save