|
|
@ -130,28 +130,30 @@ static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *ke
|
|
|
|
return tbe_ret && akg_ret;
|
|
|
|
return tbe_ret && akg_ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static std::vector<int> CalCleanZerosSize(const CNodePtr &pre_node) {
|
|
|
|
static std::vector<size_t> CalCleanZerosSize(const CNodePtr &pre_node) {
|
|
|
|
MS_EXCEPTION_IF_NULL(pre_node);
|
|
|
|
MS_EXCEPTION_IF_NULL(pre_node);
|
|
|
|
std::vector<int> clean_size_list;
|
|
|
|
auto kernel_mod = AnfAlgo::GetKernelMod(pre_node);
|
|
|
|
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_mod);
|
|
|
|
|
|
|
|
std::vector<size_t> clean_size_list;
|
|
|
|
// clean output
|
|
|
|
// clean output
|
|
|
|
if (AnfAlgo::HasNodeAttr(kAttrAutomicOutputIndexs, pre_node)) {
|
|
|
|
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) {
|
|
|
|
auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAutomicOutputIndexs);
|
|
|
|
auto output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAtomicOutputIndexs);
|
|
|
|
for (auto index : clean_output_indexs) {
|
|
|
|
auto output_men_size = kernel_mod->GetOutputSizeList();
|
|
|
|
TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(pre_node, index);
|
|
|
|
for (auto index : output_indexs) {
|
|
|
|
size_t type_size = GetTypeByte(TypeIdToType(output_type_id));
|
|
|
|
auto clean_item = (output_men_size.at(index) + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
|
|
|
|
std::vector<size_t> shape = AnfAlgo::GetOutputDeviceShape(pre_node, index);
|
|
|
|
clean_size_list.emplace_back(clean_item);
|
|
|
|
auto size = std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
|
|
|
|
|
|
|
|
clean_size_list.push_back((size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// clean workspace
|
|
|
|
// clean workspace
|
|
|
|
auto workspaces_size = 0;
|
|
|
|
if (AnfAlgo::HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) {
|
|
|
|
if (AnfAlgo::HasNodeAttr(kAttrAutomicWorkspaceSize, pre_node)) {
|
|
|
|
auto workspace_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(pre_node, kAttrAtomicWorkspaceIndexs);
|
|
|
|
workspaces_size = AnfAlgo::GetNodeAttr<int>(pre_node, kAttrAutomicWorkspaceSize);
|
|
|
|
auto workspace_men_sizes = kernel_mod->GetWorkspaceSizeList();
|
|
|
|
clean_size_list.push_back(workspaces_size);
|
|
|
|
for (const auto &index : workspace_indexs) {
|
|
|
|
|
|
|
|
auto clean_item = (workspace_men_sizes.at(index) + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
|
|
|
|
|
|
|
|
clean_size_list.emplace_back(clean_item);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
MS_LOG(INFO) << "clear output size:" << clean_size_list.size() << ", workspace size:" << workspaces_size
|
|
|
|
MS_LOG(INFO) << "clear output size:" << clean_size_list.size() << ",pre_node:" << pre_node->fullname_with_scope();
|
|
|
|
<< ",pre_node:" << pre_node->fullname_with_scope();
|
|
|
|
|
|
|
|
return clean_size_list;
|
|
|
|
return clean_size_list;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -175,12 +177,12 @@ static void AddTbeClearZeroNode(mindspore::session::KernelGraph *const kernel_gr
|
|
|
|
builder->SetKernelType(KernelType::TBE_KERNEL);
|
|
|
|
builder->SetKernelType(KernelType::TBE_KERNEL);
|
|
|
|
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get());
|
|
|
|
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), clear_zero.get());
|
|
|
|
auto clean_size = CalCleanZerosSize(pre_node);
|
|
|
|
auto clean_size = CalCleanZerosSize(pre_node);
|
|
|
|
AnfAlgo::SetNodeAttr(kAttrAutomicAddMemSize, MakeValue(clean_size), clear_zero);
|
|
|
|
AnfAlgo::SetNodeAttr(kAttrAtomicAddMemSize, MakeValue(clean_size), clear_zero);
|
|
|
|
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(pre_node.get()), clear_zero.get());
|
|
|
|
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(pre_node.get()), clear_zero.get());
|
|
|
|
new_nodes->push_back(clear_zero);
|
|
|
|
new_nodes->push_back(clear_zero);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool IsAtomicNode(const CNodePtr &kernel_node) {
|
|
|
|
static bool IsAtomicNode(const CNodePtr &kernel_node) {
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
|
|
|
auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node);
|
|
|
|
auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node);
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_mod);
|
|
|
|
MS_EXCEPTION_IF_NULL(kernel_mod);
|
|
|
@ -188,40 +190,44 @@ bool IsAtomicNode(const CNodePtr &kernel_node) {
|
|
|
|
if (parameters_indexs.empty()) {
|
|
|
|
if (parameters_indexs.empty()) {
|
|
|
|
return false;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
auto atomic_flag = false;
|
|
|
|
|
|
|
|
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
|
|
|
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
|
|
|
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
|
|
|
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
|
|
|
auto workspace_size_list = kernel_mod->GetWorkspaceSizeList();
|
|
|
|
|
|
|
|
size_t workspace_num = kernel_mod->GetWorkspaceSizeList().size();
|
|
|
|
size_t workspace_num = kernel_mod->GetWorkspaceSizeList().size();
|
|
|
|
if (input_num + workspace_num + output_num > parameters_indexs.size()) {
|
|
|
|
size_t param_num = parameters_indexs.size();
|
|
|
|
size_t lossNum = (input_num + workspace_num + output_num) - parameters_indexs.size();
|
|
|
|
size_t total_num = input_num + workspace_num + output_num;
|
|
|
|
for (size_t i = 0; i < lossNum; i++) {
|
|
|
|
MS_LOG(INFO) << "parameters size: " << param_num << ", input & workspace & output num: " << total_num;
|
|
|
|
parameters_indexs.push_back(0);
|
|
|
|
size_t pad_index = param_num;
|
|
|
|
}
|
|
|
|
for (; pad_index < total_num; ++pad_index) {
|
|
|
|
|
|
|
|
parameters_indexs.emplace_back(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::vector<size_t> clean_output_indexs;
|
|
|
|
// process input
|
|
|
|
// in parameters data sort as input->workspace->output
|
|
|
|
for (size_t j = 0; j < input_num; ++j) {
|
|
|
|
size_t index = 0;
|
|
|
|
if (parameters_indexs.at(j) == 1) {
|
|
|
|
while (index < output_num) {
|
|
|
|
MS_LOG(EXCEPTION) << "Atomic addr clean does't support clean input address, input index: " << j;
|
|
|
|
if (parameters_indexs[input_num + workspace_num + index] == 1) {
|
|
|
|
|
|
|
|
atomic_flag = true;
|
|
|
|
|
|
|
|
clean_output_indexs.push_back(index);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
index++;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (atomic_flag) {
|
|
|
|
// process output
|
|
|
|
AnfAlgo::SetNodeAttr(kAttrAutomicOutputIndexs, MakeValue(clean_output_indexs), kernel_node);
|
|
|
|
std::vector<size_t> output_indexs;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < output_num; ++i) {
|
|
|
|
|
|
|
|
auto param_output = parameters_indexs.at(input_num + workspace_num + i);
|
|
|
|
|
|
|
|
if (param_output == 1) {
|
|
|
|
|
|
|
|
output_indexs.emplace_back(i);
|
|
|
|
|
|
|
|
MS_LOG(INFO) << "Atomic clear output index: " << i;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (size_t i = 0; i < workspace_num; ++i) {
|
|
|
|
AnfAlgo::SetNodeAttr(kAttrAtomicOutputIndexs, MakeValue(output_indexs), kernel_node);
|
|
|
|
if (parameters_indexs[input_num + i] == 1) {
|
|
|
|
// process workspace
|
|
|
|
atomic_flag = true;
|
|
|
|
std::vector<size_t> workspace_indexs;
|
|
|
|
AnfAlgo::SetNodeAttr(kAttrAutomicWorkspaceSize,
|
|
|
|
for (size_t k = 0; k < workspace_num; ++k) {
|
|
|
|
MakeValue(std::accumulate(workspace_size_list.begin(), workspace_size_list.end(), 0)),
|
|
|
|
auto param_workspace = parameters_indexs.at(input_num + k);
|
|
|
|
kernel_node);
|
|
|
|
if (param_workspace == 1) {
|
|
|
|
break;
|
|
|
|
workspace_indexs.emplace_back(k);
|
|
|
|
|
|
|
|
MS_LOG(INFO) << "Atomic clear workspace index: " << k;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return atomic_flag;
|
|
|
|
AnfAlgo::SetNodeAttr(kAttrAtomicWorkspaceIndexs, MakeValue(workspace_indexs), kernel_node);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return !(workspace_indexs.empty() && output_indexs.empty());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
|
|
|
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
|
|
|