|
|
|
@ -296,6 +296,11 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
|
|
|
|
|
member_->use_all_reduce_ =
|
|
|
|
|
build_strategy.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce;
|
|
|
|
|
member_->nranks_ = build_strategy.num_trainers_ * places.size();
|
|
|
|
|
#if defined(PADDLE_WITH_CUDA) && defined(_WIN32)
|
|
|
|
|
if (member_->use_cuda_) {
|
|
|
|
|
PADDLE_ENFORCE(places.size() == 1, "Windows can support Single GPU only.");
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
if (!member_->use_all_reduce_) {
|
|
|
|
|
PADDLE_ENFORCE(places.size() > 1,
|
|
|
|
|
"If you set build_strategy.reduce with 'Reduce',"
|
|
|
|
@ -361,8 +366,6 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
|
|
|
|
|
member_->nccl_ctxs_.DefaultFlatCtx()->at(member_->places_[dev_id]);
|
|
|
|
|
dev_ctx->set_nccl_comm(nccl_ctx.comm());
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW("Not compiled with CUDA");
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
// broadcast parameters from the 0th device to others:
|
|
|
|
@ -544,8 +547,6 @@ void ParallelExecutor::BCastParamsToDevices(
|
|
|
|
|
}
|
|
|
|
|
nccl_ctxs->WaitAll();
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
PADDLE_THROW("Not compiled with CUDA");
|
|
|
|
|
#endif
|
|
|
|
|
} else {
|
|
|
|
|
platform::CPUPlace cpu;
|
|
|
|
@ -650,7 +651,9 @@ ParallelExecutor::~ParallelExecutor() {
|
|
|
|
|
bool ParallelExecutor::EnableParallelGraphExecution(
|
|
|
|
|
const ir::Graph &graph, const ExecutionStrategy &exec_strategy,
|
|
|
|
|
const BuildStrategy &build_strategy) const {
|
|
|
|
|
if (!FLAGS_enable_parallel_graph) return false;
|
|
|
|
|
if (!FLAGS_enable_parallel_graph) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool enable_parallel_graph = true;
|
|
|
|
|
|
|
|
|
@ -670,11 +673,19 @@ bool ParallelExecutor::EnableParallelGraphExecution(
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!member_->use_all_reduce_ || !member_->use_cuda_)
|
|
|
|
|
|
|
|
|
|
if (!member_->use_all_reduce_ || !member_->use_cuda_) {
|
|
|
|
|
if (build_strategy.enable_sequential_execution_ ||
|
|
|
|
|
exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental)
|
|
|
|
|
exec_strategy.type_ == ExecutionStrategy::ExecutorType::kExperimental) {
|
|
|
|
|
enable_parallel_graph = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
|
VLOG(1) << "Windows has no support to parallel graph, enable_parallel_graph "
|
|
|
|
|
"would be forced to false.";
|
|
|
|
|
enable_parallel_graph = false;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return enable_parallel_graph;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|