optimize fast graph executor (#28962)

musl/disable_test_yolov3_temporarily
WangXi 5 years ago committed by GitHub
parent 562ded1041
commit 173c22aec2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -231,6 +231,23 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
OpHandleBase *op_to_run = op_queue.back(); OpHandleBase *op_to_run = op_queue.back();
op_queue.pop_back(); op_queue.pop_back();
// The Op involves data transfer of multiple devices may block other
// computations emit. For example:
// 1 step, queue=[Share, Allreduce], which Share is high priority
// 2 step, Share exec, pending_op=Grad, queue=[Allreduce, Grad]
// 3 step, Allreduce run with sync. Although Allreduce and Grad do not
// have topo dependency, but Grad must wait for Allreduce to complete
// before scheduling.
// In this scenario, calculation and communication may not overlap.
// Therefore, emit the op in the queue before running multi device op.
if (op_to_run->IsMultiDeviceTransfer()) {
while (!op_queue.empty()) {
OpHandleBase *post_op = op_queue.back();
op_queue.pop_back();
RunOpAsync(op_deps, post_op, complete_q);
}
}
if (!RunOp(op_to_run, complete_q, &complete)) { if (!RunOp(op_to_run, complete_q, &complete)) {
return; return;
} }
@ -246,6 +263,9 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
// first without switching to another thread. // first without switching to another thread.
if (pending_op->GetPriority() == OpHandleBase::Priority::kHighest) { if (pending_op->GetPriority() == OpHandleBase::Priority::kHighest) {
op_queue.push_back(pending_op); op_queue.push_back(pending_op);
} else if (pending_op->IsMultiDeviceTransfer()) {
// multi device ops should be scheduled prior to computing ops
op_queue.push_front(pending_op);
} else { } else {
if (op_to_run == nullptr) { if (op_to_run == nullptr) {
op_to_run = pending_op; op_to_run = pending_op;

Loading…
Cancel
Save