!1892 [AutoParallel] limit partition dimension to adapt new HCCL's constrait

Merge pull request !1892 from Chong/ReID
pull/1892/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit f6b5b2732f

@ -37,7 +37,10 @@ void GenerateStrategy(std::shared_ptr<Graph> graph, const std::vector<std::share
MS_EXCEPTION_IF_NULL(index_list);
GeneratePartitionedOperatorStrategy(graph, ops, index_list);
std::shared_ptr<std::vector<size_t>> no_stra_op_list(new std::vector<size_t>);
GenerateEliminatedOperatorStrategyForward(graph, ops, eli_list, input_tensor_names, index_list, no_stra_op_list);
for (size_t i = 0; i < eli_list->size(); i++) {
no_stra_op_list->push_back(eli_list->at(i)[0]);
}
GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list);
GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list);
GenerateRemainingOperatorStrategy(graph, ops, input_tensor_names, index_list, no_stra_op_list);
}
@ -49,6 +52,58 @@ std::vector<std::vector<int32_t>> PrepareMatMul(const std::shared_ptr<Graph> &gr
auto attrs = ops[iter_ops]->attrs();
bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
// HCCL does not support multi-dimension partition, and the hardware does not support excessive
// number of EVENT, so we temporarily disable matmul's multi-dimension partition function.
auto max_cut = 1.0 / g_device_manager->DeviceNum();
if (graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h != max_cut &&
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w != max_cut) {
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = 1.0;
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_w = 1.0;
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_h = 1.0;
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = 1.0;
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0;
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0;
auto shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[0];
if (transpose_a) {
shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[1];
}
auto shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[1];
if (transpose_b) {
shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[0];
}
bool already_cut = false;
if (shape_1 >= shape_4) {
if (shape_1 % g_device_manager->DeviceNum() == 0) {
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut;
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut;
already_cut = true;
}
if (!already_cut && shape_4 % g_device_manager->DeviceNum() == 0) {
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut;
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut;
already_cut = true;
}
} else {
if (shape_4 % g_device_manager->DeviceNum() == 0) {
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut;
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut;
already_cut = true;
}
if (!already_cut && shape_1 % g_device_manager->DeviceNum() == 0) {
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut;
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut;
already_cut = true;
}
}
if (!already_cut) {
MS_LOG(EXCEPTION) << "Failure: MatMul's shape is invalid.";
}
}
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
std::vector<int32_t> s;
if (transpose_a && (iter_op_inputs == 0)) {
@ -401,6 +456,11 @@ std::vector<int32_t> ModifyStrategyIfReduceIncoming(const std::vector<std::share
return s_Reduce;
}
std::vector<int32_t> ModifyStrategyIfSoftmaxIncoming(std::vector<int32_t> s) {
s.pop_back();
return s;
}
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
const size_t iter_ops, const size_t incoming_op_index) {
std::vector<int32_t> s;
@ -414,6 +474,9 @@ std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::sh
ops[incoming_op_index]->type() == REDUCE_MIN || ops[incoming_op_index]->type() == REDUCE_MEAN) {
s = ModifyStrategyIfReduceIncoming(ops, incoming_op_index, s);
}
if (ops[incoming_op_index]->type() == SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) {
s = ModifyStrategyIfSoftmaxIncoming(s);
}
}
return s;
}
@ -466,12 +529,16 @@ std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vect
void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> graph,
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
const std::vector<std::vector<std::string>> &input_tensor_names,
const std::shared_ptr<std::vector<size_t>> index_list,
const std::shared_ptr<std::vector<size_t>> no_stra_op_list) {
for (size_t eli_index = eli_list->size(); eli_index > 0; eli_index--) {
size_t iter_ops = eli_list->at(eli_index - 1)[0];
if (no_stra_op_list->size() == 0) {
return;
}
std::vector<size_t> no_stra_op_list_bis;
for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) {
size_t iter_ops = no_stra_op_list->at(iter_list - 1);
std::vector<std::vector<int32_t>> stra;
std::vector<int32_t> s;
size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops);
@ -485,7 +552,7 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> grap
}
if (s.size() == 0) {
no_stra_op_list->push_back(iter_ops);
no_stra_op_list_bis.push_back(iter_ops);
} else {
stra = GenerateStrategiesFromStrategy(ops, iter_ops, s);
}
@ -493,6 +560,11 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> grap
StrategyPtr sp = std::make_shared<Strategy>(0, stra);
ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost());
}
no_stra_op_list->clear();
for (size_t i = 0; i < no_stra_op_list_bis.size(); i++) {
no_stra_op_list->push_back(no_stra_op_list_bis[i]);
}
}
std::vector<int32_t> ModifyStrategyIfSqueezeOutgoing(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
@ -598,31 +670,27 @@ void GenerateRemainingOperatorStrategy(const std::shared_ptr<Graph> graph,
return;
}
for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) {
auto iter_ops = no_stra_op_list->at(iter_list - 1);
size_t no_stra_op_list_size;
do {
no_stra_op_list_size = no_stra_op_list->size();
GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list);
GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list);
} while (no_stra_op_list_size > no_stra_op_list->size());
for (size_t iter_list = 0; iter_list < no_stra_op_list->size(); iter_list++) {
auto iter_ops = no_stra_op_list->at(iter_list);
std::vector<std::vector<int32_t>> stra;
std::vector<int32_t> s;
size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops);
if (incoming_op_index != SIZE_MAX) {
auto iter_graph = index_list->at(incoming_op_index);
if (iter_graph != SIZE_MAX) {
s = CopyIncomingOperatorOutputStrategy(graph, ops, iter_ops, iter_graph);
} else {
s = CopyIncomingOperatorInputStrategy(ops, iter_ops, incoming_op_index);
}
}
if (s.size() == 0) {
size_t max_dim_num = 0;
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) {
max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size();
}
}
for (size_t i = 0; i < max_dim_num; i++) {
s.push_back(1);
size_t max_dim_num = 0;
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) {
max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size();
}
}
for (size_t i = 0; i < max_dim_num; i++) {
s.push_back(1);
}
stra = GenerateStrategiesFromStrategy(ops, iter_ops, s);
StrategyPtr sp = std::make_shared<Strategy>(0, stra);

@ -64,13 +64,13 @@ std::vector<int32_t> ModifyStrategyIfSqueezeIncoming(const std::vector<std::shar
std::vector<int32_t> GetDimList(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops);
std::vector<int32_t> ModifyStrategyIfReduceIncoming(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
const size_t incoming_op_index, std::vector<int32_t> s);
std::vector<int32_t> ModifyStrategyIfSoftmaxIncoming(std::vector<int32_t> s);
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
const size_t iter_ops, const size_t incoming_op_index);
std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
const size_t iter_ops, std::vector<int32_t> s);
void GenerateEliminatedOperatorStrategyForward(std::shared_ptr<Graph> graph,
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
const std::vector<std::vector<std::string>> &input_tensor_names,
const std::shared_ptr<std::vector<size_t>> index_list,
const std::shared_ptr<std::vector<size_t>> no_stra_op_list);

@ -1101,13 +1101,22 @@ std::vector<std::vector<std::string>> RecInputTensorNames(const std::map<std::st
}
Status ParallelStrategyRecSearch(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &root) {
if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) {
MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size()
<< " operators.";
if (CostModelContext::GetInstance()->is_multi_subgraphs()) {
if (ConstructCostGraphNodesByUniqueIdTC(all_nodes, root) == SUCCESS) {
MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are "
<< entire_costgraph->GetOperators().size() << " operators.";
} else {
MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed.";
}
} else {
MS_LOG(ERROR) << "Constructing nodes for cost graph failed.";
return FAILED;
if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) {
MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are "
<< entire_costgraph->GetOperators().size() << " operators.";
} else {
MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed.";
}
}
ReshapeCostCompute(all_nodes);
auto ops = entire_costgraph->GetOperators();
std::vector<std::vector<std::string>> input_tensor_names = entire_costgraph->get_inputs_tensor_name_list();

Loading…
Cancel
Save