|
|
|
|
@ -33,14 +33,15 @@
|
|
|
|
|
namespace mindspore {
|
|
|
|
|
namespace dataset {
|
|
|
|
|
DeviceQueueOp::DeviceQueueOp(std::string channel_name, DeviceType device_type, int32_t device_id, int32_t prefetch_size,
|
|
|
|
|
bool send_epoch_end)
|
|
|
|
|
bool send_epoch_end, int total_batch)
|
|
|
|
|
: PipelineOp(1),
|
|
|
|
|
channel_name_(channel_name),
|
|
|
|
|
device_type_(device_type),
|
|
|
|
|
device_id_(device_id),
|
|
|
|
|
prefetch_size_(prefetch_size),
|
|
|
|
|
send_epoch_end_(send_epoch_end),
|
|
|
|
|
stop_send_(false) {
|
|
|
|
|
stop_send_(false),
|
|
|
|
|
total_batch_(total_batch) {
|
|
|
|
|
#ifdef ENABLE_TDTQUE
|
|
|
|
|
ascend_keep_waiting_ = true;
|
|
|
|
|
#endif
|
|
|
|
|
@ -60,7 +61,8 @@ DeviceQueueOp::Builder::Builder(int32_t prefetch_size)
|
|
|
|
|
: builder_prefetch_size_(prefetch_size),
|
|
|
|
|
builder_device_id_(0),
|
|
|
|
|
builder_device_type_(DeviceType::CPU),
|
|
|
|
|
builder_channel_name_("") {}
|
|
|
|
|
builder_channel_name_(""),
|
|
|
|
|
builder_total_batch_(0) {}
|
|
|
|
|
|
|
|
|
|
Status DeviceQueueOp::EoeReceived(int32_t worker_id) {
|
|
|
|
|
state_ = OpState::kDeOpIdle;
|
|
|
|
|
@ -102,11 +104,13 @@ Status DeviceQueueOp::operator()() {
|
|
|
|
|
#ifdef ENABLE_TDTQUE
|
|
|
|
|
Status DeviceQueueOp::SendDataToAscend() {
|
|
|
|
|
MS_LOG(INFO) << "Device queue, sending data to Ascend.";
|
|
|
|
|
int64_t total_batch = 0;
|
|
|
|
|
int64_t send_batch = 0;
|
|
|
|
|
double batch_start_time, end_time;
|
|
|
|
|
int32_t batch_cost, tdt_cost;
|
|
|
|
|
int32_t connector_size = 0;
|
|
|
|
|
int32_t connector_capacity;
|
|
|
|
|
bool is_break_loop = false;
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<DeviceQueueTracing> profiling_node;
|
|
|
|
|
bool isProfilingEnable = tree_->GetProfilingManager()->IsProfilingEnable();
|
|
|
|
|
if (isProfilingEnable) {
|
|
|
|
|
@ -119,8 +123,8 @@ Status DeviceQueueOp::SendDataToAscend() {
|
|
|
|
|
std::unique_ptr<DataBuffer> current_buffer;
|
|
|
|
|
RETURN_IF_NOT_OK(GetNextInput(¤t_buffer));
|
|
|
|
|
|
|
|
|
|
while (!current_buffer->eof()) {
|
|
|
|
|
while (!current_buffer->eoe()) {
|
|
|
|
|
while (!current_buffer->eof() && !is_break_loop) {
|
|
|
|
|
while (!current_buffer->eoe() && !is_break_loop) {
|
|
|
|
|
RETURN_IF_NOT_OK(CheckExceptions(current_buffer));
|
|
|
|
|
TensorRow currRow;
|
|
|
|
|
for (int row_id = 0; row_id < current_buffer->NumRows(); row_id++) {
|
|
|
|
|
@ -142,17 +146,21 @@ Status DeviceQueueOp::SendDataToAscend() {
|
|
|
|
|
if (isProfilingEnable) {
|
|
|
|
|
end_time = ProfilingTime::GetCurMilliSecond();
|
|
|
|
|
// record push tdt time
|
|
|
|
|
profiling_node->Record(TIME, TDT_PUSH_TIME, total_batch + 1, tdt_cost);
|
|
|
|
|
profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch + 1, tdt_cost);
|
|
|
|
|
batch_cost = (int32_t)(end_time - batch_start_time);
|
|
|
|
|
// record batch time
|
|
|
|
|
profiling_node->Record(TIME, BATCH_TIME, total_batch + 1, batch_cost);
|
|
|
|
|
profiling_node->Record(TIME, BATCH_TIME, send_batch + 1, batch_cost);
|
|
|
|
|
// record pipeline time
|
|
|
|
|
profiling_node->Record(TIME, PIPELINE_TIME, total_batch + 1, batch_cost - tdt_cost);
|
|
|
|
|
profiling_node->Record(TIME, PIPELINE_TIME, send_batch + 1, batch_cost - tdt_cost);
|
|
|
|
|
batch_start_time = end_time;
|
|
|
|
|
// record connector depth
|
|
|
|
|
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, total_batch + 1, connector_size);
|
|
|
|
|
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch + 1, connector_size);
|
|
|
|
|
}
|
|
|
|
|
send_batch++;
|
|
|
|
|
if (total_batch_ > 0 && send_batch >= total_batch_) {
|
|
|
|
|
is_break_loop = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
total_batch++;
|
|
|
|
|
}
|
|
|
|
|
if (isProfilingEnable) {
|
|
|
|
|
connector_size = ChildOpConnectorSize();
|
|
|
|
|
@ -184,7 +192,7 @@ Status DeviceQueueOp::SendDataToAscend() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tree_->SetFinished();
|
|
|
|
|
MS_LOG(INFO) << "Device queue total batch is " << total_batch;
|
|
|
|
|
MS_LOG(INFO) << "Device queue total batch is " << send_batch;
|
|
|
|
|
|
|
|
|
|
return Status::OK();
|
|
|
|
|
}
|
|
|
|
|
@ -193,7 +201,7 @@ Status DeviceQueueOp::SendDataToAscend() {
|
|
|
|
|
#ifdef ENABLE_GPUQUE
|
|
|
|
|
Status DeviceQueueOp::SendDataToGPU() {
|
|
|
|
|
MS_LOG(INFO) << "Device queue, sending data to GPU.";
|
|
|
|
|
int64_t total_batch = 0;
|
|
|
|
|
int64_t send_batch = 0;
|
|
|
|
|
bool is_break_loop = false;
|
|
|
|
|
bool is_open = false;
|
|
|
|
|
uint32_t handle = INVALID_HANDLE;
|
|
|
|
|
@ -235,19 +243,23 @@ Status DeviceQueueOp::SendDataToGPU() {
|
|
|
|
|
is_open = true;
|
|
|
|
|
}
|
|
|
|
|
RETURN_IF_NOT_OK(RetryPushGPUData(data_size, curr_row, handle, isProfilingEnable, &push_cost));
|
|
|
|
|
total_batch++;
|
|
|
|
|
send_batch++;
|
|
|
|
|
if (isProfilingEnable) {
|
|
|
|
|
end_time = ProfilingTime::GetCurMilliSecond();
|
|
|
|
|
// record push data time
|
|
|
|
|
profiling_node->Record(TIME, TDT_PUSH_TIME, total_batch, push_cost);
|
|
|
|
|
profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch, push_cost);
|
|
|
|
|
batch_cost = (int32_t)(end_time - batch_start_time);
|
|
|
|
|
// record batch time
|
|
|
|
|
profiling_node->Record(TIME, BATCH_TIME, total_batch, batch_cost);
|
|
|
|
|
profiling_node->Record(TIME, BATCH_TIME, send_batch, batch_cost);
|
|
|
|
|
// record pipeline time
|
|
|
|
|
profiling_node->Record(TIME, PIPELINE_TIME, total_batch, batch_cost - push_cost);
|
|
|
|
|
profiling_node->Record(TIME, PIPELINE_TIME, send_batch, batch_cost - push_cost);
|
|
|
|
|
batch_start_time = end_time;
|
|
|
|
|
// record connector depth
|
|
|
|
|
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, total_batch, connector_size);
|
|
|
|
|
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch, connector_size);
|
|
|
|
|
}
|
|
|
|
|
if (total_batch_ > 0 && send_batch >= total_batch_) {
|
|
|
|
|
is_break_loop = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!TaskManager::FindMe()->Interrupted() && !GpuBufferMgr::GetInstance().IsClosed()) {
|
|
|
|
|
@ -272,7 +284,7 @@ Status DeviceQueueOp::SendDataToGPU() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tree_->SetFinished();
|
|
|
|
|
MS_LOG(INFO) << "Device queue total batch is " << total_batch << ".";
|
|
|
|
|
MS_LOG(INFO) << "Device queue total batch is " << send_batch << ".";
|
|
|
|
|
|
|
|
|
|
GpuBufferMgr::GetInstance().Close(handle);
|
|
|
|
|
GpuBufferMgr::GetInstance().CloseConfirm();
|
|
|
|
|
|