!307 modify aicpu single op dump

From: @zhou_chao1993
Reviewed-by: 
Signed-off-by:
pull/307/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit c209ef2f94

@ -124,6 +124,7 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
size_t io_addr_num = args_.size();
if (task->GetOpTaskType() == OP_TASK_AICPU) {
GELOGD("Update aicpu_TF task args");
task->SetIoAddrsForDump(args_);
auto *dst_io_addr = const_cast<uintptr_t *>(reinterpret_cast<const uintptr_t *>(task->GetIOAddr()));
GE_CHECK_NOTNULL(dst_io_addr);
auto rt_ret = rtMemcpyAsync(dst_io_addr,
@ -170,11 +171,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
if (ret != SUCCESS) {
return ret;
}
ret = task->OpenDump(args_, stream_);
if (ret != SUCCESS) {
GELOGE(ret, "Open dump failed");
return ret;
}
}
return ret;

@ -39,26 +39,27 @@ void FreeHbm(void *var) {
(void)rtFree(var);
}
}
}
} // namespace
Status OpTask::OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream) {
Status OpTask::OpenDump(rtStream_t stream) {
if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) {
GELOGI("Dump is open in single op,start to set dump info");
std::vector<uint64_t> input_addrs;
std::vector<uint64_t> output_adds;
auto input_size = op_desc_->GetInputsSize();
auto output_size = op_desc_->GetOutputsSize();
auto all_size = io_addr.size();
auto all_size = io_addrs_for_dump_.size();
if (input_size + output_size != all_size) {
GELOGE(FAILED, "io_addr size is not equal input and output size");
GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", all_size,
input_size + output_size);
return FAILED;
}
for (size_t i = 0; i < input_size; i++) {
uint64_t input_addr = static_cast<uint64_t>(io_addr[i]);
uint64_t input_addr = io_addrs_for_dump_[i];
input_addrs.emplace_back(input_addr);
}
for (size_t j = 0; j < output_size; j++) {
uint64_t output_addr = static_cast<uint64_t>(io_addr[input_size + j]);
uint64_t output_addr = io_addrs_for_dump_[input_size + j];
output_adds.emplace_back(output_addr);
}
dump_op_.SetDumpInfo(DumpManager::GetInstance().GetDumpProperties(), op_desc_, input_addrs, output_adds, stream);
@ -126,6 +127,17 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) {
}
GELOGI("[TASK_INFO] %s", this->stub_name_.c_str());
size_t input_size = op_desc_->GetInputsSize();
size_t output_size = op_desc_->GetOutputsSize();
uint64_t *io_addr = reinterpret_cast<uint64_t *>(args_.get());
std::vector<uint64_t> io_addrs(io_addr, io_addr + input_size + output_size);
SetIoAddrsForDump(io_addrs);
auto status = OpenDump(stream);
if (status != SUCCESS) {
GELOGE(status, "Open dump failed in the tbe single op %s", this->stub_name_.c_str());
return status;
}
return SUCCESS;
}
@ -387,6 +399,12 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) {
}
GELOGI("[TASK_INFO] is %s", this->task_info_.c_str());
auto status = OpenDump(stream);
if (status != SUCCESS) {
GELOGE(status, "Open dump failed in aicpu single op %s", this->op_type_.c_str());
return status;
}
GELOGD("Done launch kernel successfully. task = %s", this->op_type_.c_str());
return SUCCESS;
}
@ -680,6 +698,17 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
}
GELOGD("Invoke rtCpuKernelLaunch succeeded");
size_t input_size = op_desc_->GetInputsSize();
size_t output_size = op_desc_->GetOutputsSize();
uint64_t *io_addr = reinterpret_cast<uint64_t *>(io_addr_);
std::vector<uint64_t> io_addrs (io_addr, io_addr + input_size + output_size);
SetIoAddrsForDump(io_addrs);
auto status = OpenDump(stream);
if (status != SUCCESS) {
GELOGE(status, "Open dump failed in the aicpucc single op %s", this->kernel_name_.c_str());
return status;
}
return SUCCESS;
}

@ -59,7 +59,10 @@ class OpTask {
const vector<int64_t> &GetWorkspaceSizes() const;
void SetWorkspaceSizes(const vector<int64_t> &workspace_sizes);
const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(const std::vector<uintptr_t> &io_addr, rtStream_t stream);
Status OpenDump(rtStream_t stream);
void SetIoAddrsForDump(const vector<uint64_t> &io_addrs_for_dump) {
io_addrs_for_dump_ = io_addrs_for_dump;
}
virtual Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &input_buffers,
std::vector<GeTensorDesc> &output_desc,
@ -74,6 +77,7 @@ class OpTask {
DumpProperties dump_properties_;
DumpOp dump_op_;
OpDescPtr op_desc_;
std::vector<uint64_t> io_addrs_for_dump_;
};
class TbeOpTask : public OpTask {
@ -180,9 +184,11 @@ class AiCpuTask : public AiCpuBaseTask {
friend class AiCpuTaskBuilder;
void *workspace_addr_ = nullptr;
std::string task_info_;
// device addr
void *args_ = nullptr;
size_t arg_size_ = 0;
std::string op_type_;
// device addr
void *io_addr_ = nullptr;
bool dynamic_flag_ = false;

Loading…
Cancel
Save