fix 1gpu test=develop

revert-15207-remove_op_handle_lock_and_fix_var
Yancey1989 6 years ago
parent d3a4da5cf6
commit 06936a2ff5

@ -51,7 +51,8 @@ void AllReduceOpHandle::RunImpl() {
// FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR, // FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR,
// this is a distributed or inter-process call, find a better way. // this is a distributed or inter-process call, find a better way.
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
// Find NCCL ID from the global scope. // All-reduce op_handle can run on the sub-scope, find the nccl id from
// the global scope.
if (NoDummyInputSize() == 1 && if (NoDummyInputSize() == 1 &&
local_scopes_[0]->FindVar(NCCL_ID_VARNAME) == nullptr) { local_scopes_[0]->FindVar(NCCL_ID_VARNAME) == nullptr) {
#else #else

@ -59,7 +59,7 @@ FeedFetchList ParallelSSAGraphExecutor::Run(
if (pool_) { if (pool_) {
run_futures.emplace_back(pool_->enqueue(std::move(call))); run_futures.emplace_back(pool_->enqueue(std::move(call)));
} else { } else {
call(); fetch_datas.emplace_back(std::move(call()));
} }
} }

@ -231,7 +231,7 @@ ParallelExecutor::ParallelExecutor(
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME); auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME);
ncclUniqueId *nccl_id = nullptr; ncclUniqueId *nccl_id = nullptr;
if (build_strategy.enable_parallel_graph_) { if (build_strategy.enable_parallel_graph_ && places.size() > 1) {
// parallel graph mode should initialize nccl by ncclCommInitRank since // parallel graph mode should initialize nccl by ncclCommInitRank since
// it call nccl operator per device per thread. // it call nccl operator per device per thread.
if (nccl_id_var == nullptr) { if (nccl_id_var == nullptr) {

Loading…
Cancel
Save