Merge pull request #11423 from typhoonzero/fix_develop_bugs

Fix nccl dist train bug
wangkuiyi-patch-1
Wu Yi 7 years ago committed by GitHub
commit 688e18508f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -67,6 +67,10 @@ class GenNCCLIdOp : public framework::OperatorBase {
client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
}
client->Wait();
for (auto& ep : endpoint_list) {
client->AsyncSendBatchBarrier(ep);
}
client->Wait();
VLOG(3) << "sending completed...";
}

Loading…
Cancel
Save