|
|
@ -42,8 +42,7 @@ Status CacheBase::Reset() {
|
|
|
|
RETURN_IF_NOT_OK(sampler_->ResetSampler());
|
|
|
|
RETURN_IF_NOT_OK(sampler_->ResetSampler());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Wake up the workers to get them going again in a new epoch
|
|
|
|
// Wake up the workers to get them going again in a new epoch
|
|
|
|
MS_LOG(DEBUG) << Name() << " resetting.";
|
|
|
|
MS_LOG(DEBUG) << Name() << " performing a self-reset.";
|
|
|
|
epoch_sync_.Set();
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
|
|
|
CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
|
|
@ -72,7 +71,6 @@ Status CacheBase::FetchSamplesToWorkers() {
|
|
|
|
// Instead of sending sampler id to WorkerEntry, we send them to the Prefetcher which will redirect them
|
|
|
|
// Instead of sending sampler id to WorkerEntry, we send them to the Prefetcher which will redirect them
|
|
|
|
// to the WorkerEntry.
|
|
|
|
// to the WorkerEntry.
|
|
|
|
do {
|
|
|
|
do {
|
|
|
|
epoch_sync_.Clear();
|
|
|
|
|
|
|
|
if (AllowCacheMiss() && wait_cnt > 0) {
|
|
|
|
if (AllowCacheMiss() && wait_cnt > 0) {
|
|
|
|
MS_LOG(WARNING) << "Epoch: " << wait_cnt << " Cache Miss : " << num_cache_miss_
|
|
|
|
MS_LOG(WARNING) << "Epoch: " << wait_cnt << " Cache Miss : " << num_cache_miss_
|
|
|
|
<< " Total number of rows : " << row_cnt_;
|
|
|
|
<< " Total number of rows : " << row_cnt_;
|
|
|
@ -112,11 +110,17 @@ Status CacheBase::FetchSamplesToWorkers() {
|
|
|
|
// If repeat but the not last repeat, wait for reset.
|
|
|
|
// If repeat but the not last repeat, wait for reset.
|
|
|
|
if (!IsLastIteration()) {
|
|
|
|
if (!IsLastIteration()) {
|
|
|
|
MS_LOG(DEBUG) << Name() << " Waiting for reset. Count " << wait_cnt << " Buffer sent " << buf_cnt;
|
|
|
|
MS_LOG(DEBUG) << Name() << " Waiting for reset. Count " << wait_cnt << " Buffer sent " << buf_cnt;
|
|
|
|
RETURN_IF_NOT_OK(epoch_sync_.Wait());
|
|
|
|
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
// We can break out from the loop.
|
|
|
|
// We can break out from the loop.
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (epoch_sync_flag_) {
|
|
|
|
|
|
|
|
// If epoch_sync_flag_ is set, then master thread sleeps until all the worker threads have finished their job for
|
|
|
|
|
|
|
|
// the current epoch.
|
|
|
|
|
|
|
|
RETURN_IF_NOT_OK(WaitForWorkers());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// If not the last repeat, self-reset and go to loop again.
|
|
|
|
|
|
|
|
if (!IsLastIteration()) RETURN_IF_NOT_OK(Reset());
|
|
|
|
UpdateRepeatAndEpochCounter();
|
|
|
|
UpdateRepeatAndEpochCounter();
|
|
|
|
} while (true);
|
|
|
|
} while (true);
|
|
|
|
// Flow the eof before exit
|
|
|
|
// Flow the eof before exit
|
|
|
@ -142,7 +146,13 @@ Status CacheBase::FetchFromCache(int32_t worker_id) {
|
|
|
|
std::unique_ptr<IOBlock> blk;
|
|
|
|
std::unique_ptr<IOBlock> blk;
|
|
|
|
do {
|
|
|
|
do {
|
|
|
|
RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&blk));
|
|
|
|
RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&blk));
|
|
|
|
if (blk->eof()) {
|
|
|
|
if (blk->wait()) {
|
|
|
|
|
|
|
|
// Sync io_block is a signal that master thread wants us to pause and sync with other workers.
|
|
|
|
|
|
|
|
// The last guy who comes to this sync point should reset the counter and wake up the master thread.
|
|
|
|
|
|
|
|
if (++num_workers_paused_ == num_workers_) {
|
|
|
|
|
|
|
|
wait_for_workers_post_.Set();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (blk->eof()) {
|
|
|
|
RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
|
|
|
|
RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF)));
|
|
|
|
} else if (blk->eoe()) {
|
|
|
|
} else if (blk->eoe()) {
|
|
|
|
if (AllowCacheMiss()) {
|
|
|
|
if (AllowCacheMiss()) {
|
|
|
@ -186,7 +196,7 @@ Status CacheBase::FetchFromCache(int32_t worker_id) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Status CacheBase::RegisterResources() {
|
|
|
|
Status CacheBase::RegisterResources() {
|
|
|
|
RETURN_IF_NOT_OK(epoch_sync_.Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(prefetch_queues_.Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(prefetch_queues_.Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(sampler_queue_->Register(tree_->AllTasks()));
|
|
|
|
RETURN_IF_NOT_OK(sampler_queue_->Register(tree_->AllTasks()));
|
|
|
|