Remove nccl dep when the number of GPU is 1 (#18158)

* remove nccl dep when the number of GPU is 1
test=develop
revert-18229-add_multi_gpu_install_check
chengduo 6 years ago committed by GitHub
parent 25ab23be28
commit 4978db2c10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -369,8 +369,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
"Execution which can get better performance,"
<< "you can force it off by env FLAGS_enable_parallel_graph=0";
if (member_->use_cuda_) {
// Bcast Parameters to all GPUs
if (member_->use_cuda_ && member_->nranks_ > 1) {
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
member_->InitOrGetNCCLCommunicator(scope, build_strategy);
@ -405,10 +404,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
}
return false;
};
// Bcast Parameters to all GPUs
if (need_broadcast()) {
BCastParamsToDevices(bcast_vars, build_strategy.trainer_id_);
}
// Startup Program has been run. All local scopes has correct parameters.
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert

@ -316,7 +316,9 @@ CUDADeviceContext::~CUDADeviceContext() {
eigen_device_.reset();
PADDLE_ENFORCE(cudaStreamDestroy(stream_));
#if !defined(_WIN32)
PADDLE_ENFORCE(dynload::ncclCommDestroy(nccl_comm_));
if (nccl_comm_) {
PADDLE_ENFORCE(dynload::ncclCommDestroy(nccl_comm_));
}
#endif
}

@ -223,5 +223,5 @@ if(WITH_DISTRIBUTE)
endif()
set_tests_properties(test_recordio_reader test_parallel_executor_test_while_train test_parallel_executor_mnist
test_parallel_executor_seresnext test_parallel_executor_crf
test_parallel_executor_seresnext test_parallel_executor_crf test_sync_batch_norm_op
PROPERTIES LABELS "RUN_TYPE=DIST")

@ -98,6 +98,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
#####################################################################
# Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
assert core.get_cuda_device_count() > 1
main, startup, outs = self.build_program(place, layout, seed, True,
only_forward)
exe = fluid.Executor(place)

Loading…
Cancel
Save