|
|
|
@ -30,8 +30,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
|
|
|
|
|
local_scopes_(local_scopes),
|
|
|
|
|
places_(places),
|
|
|
|
|
graph_(std::move(graph)),
|
|
|
|
|
pool_(strategy.num_threads_ +
|
|
|
|
|
1), // add one more thread for generate op_deps
|
|
|
|
|
pool_(strategy.num_threads_),
|
|
|
|
|
prepare_pool_(1), // add one more thread for generate op_deps
|
|
|
|
|
fetch_ctxs_(places) {
|
|
|
|
|
for (auto &op : ir::FilterByNodeWrapper<OpHandleBase>(*graph_)) {
|
|
|
|
|
int dep = static_cast<int>(op->NotReadyInputSize());
|
|
|
|
@ -160,7 +160,7 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
void FastThreadedSSAGraphExecutor::PrepareAtomicOpDeps() {
|
|
|
|
|
atomic_op_deps_ = pool_.enqueue([&] {
|
|
|
|
|
atomic_op_deps_ = prepare_pool_.enqueue([&] {
|
|
|
|
|
auto *op_deps = new std::unordered_map<OpHandleBase *, std::atomic<int>>;
|
|
|
|
|
for (auto &pair : op_deps_) {
|
|
|
|
|
(*op_deps)[pair.first] = pair.second;
|
|
|
|
|