diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc index 7913c95573..8faa251e20 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc @@ -37,7 +37,10 @@ void GenerateStrategy(std::shared_ptr graph, const std::vector> no_stra_op_list(new std::vector); - GenerateEliminatedOperatorStrategyForward(graph, ops, eli_list, input_tensor_names, index_list, no_stra_op_list); + for (size_t i = 0; i < eli_list->size(); i++) { + no_stra_op_list->push_back(eli_list->at(i)[0]); + } + GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list); GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list); GenerateRemainingOperatorStrategy(graph, ops, input_tensor_names, index_list, no_stra_op_list); } @@ -49,6 +52,58 @@ std::vector> PrepareMatMul(const std::shared_ptr &gr auto attrs = ops[iter_ops]->attrs(); bool transpose_a = attrs[TRANSPOSE_A]->cast()->value(); bool transpose_b = attrs[TRANSPOSE_B]->cast()->value(); + + // HCCL does not support multi-dimension partition, and the hardware does not support excessive + // number of EVENT, so we temporarily disable matmul's multi-dimension partition function. + auto max_cut = 1.0 / g_device_manager->DeviceNum(); + if (graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h != max_cut && + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w != max_cut) { + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = 1.0; + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_w = 1.0; + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_h = 1.0; + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0; + + auto shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[0]; + if (transpose_a) { + shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[1]; + } + auto shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[1]; + if (transpose_b) { + shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[0]; + } + + bool already_cut = false; + if (shape_1 >= shape_4) { + if (shape_1 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut; + already_cut = true; + } + if (!already_cut && shape_4 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut; + already_cut = true; + } + } else { + if (shape_4 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut; + already_cut = true; + } + if (!already_cut && shape_1 % g_device_manager->DeviceNum() == 0) { + graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut; + already_cut = true; + } + } + + if (!already_cut) { + MS_LOG(EXCEPTION) << "Failure: MatMul's shape is invalid."; + } + } + for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { std::vector s; if (transpose_a && (iter_op_inputs == 0)) { @@ -401,6 +456,11 @@ std::vector ModifyStrategyIfReduceIncoming(const std::vector ModifyStrategyIfSoftmaxIncoming(std::vector s) { + s.pop_back(); + return s; +} + std::vector CopyIncomingOperatorInputStrategy(const std::vector> &ops, const size_t iter_ops, const size_t incoming_op_index) { std::vector s; @@ -414,6 +474,9 @@ std::vector CopyIncomingOperatorInputStrategy(const std::vectortype() == REDUCE_MIN || ops[incoming_op_index]->type() == REDUCE_MEAN) { s = ModifyStrategyIfReduceIncoming(ops, incoming_op_index, s); } + if (ops[incoming_op_index]->type() == SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) { + s = ModifyStrategyIfSoftmaxIncoming(s); + } } return s; } @@ -466,12 +529,16 @@ std::vector> GenerateStrategiesFromStrategy(const std::vect void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr graph, const std::vector> &ops, - const std::shared_ptr>> eli_list, const std::vector> &input_tensor_names, const std::shared_ptr> index_list, const std::shared_ptr> no_stra_op_list) { - for (size_t eli_index = eli_list->size(); eli_index > 0; eli_index--) { - size_t iter_ops = eli_list->at(eli_index - 1)[0]; + if (no_stra_op_list->size() == 0) { + return; + } + std::vector no_stra_op_list_bis; + + for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) { + size_t iter_ops = no_stra_op_list->at(iter_list - 1); std::vector> stra; std::vector s; size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops); @@ -485,7 +552,7 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr grap } if (s.size() == 0) { - no_stra_op_list->push_back(iter_ops); + no_stra_op_list_bis.push_back(iter_ops); } else { stra = GenerateStrategiesFromStrategy(ops, iter_ops, s); } @@ -493,6 +560,11 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr grap StrategyPtr sp = std::make_shared(0, stra); ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost()); } + + no_stra_op_list->clear(); + for (size_t i = 0; i < no_stra_op_list_bis.size(); i++) { + no_stra_op_list->push_back(no_stra_op_list_bis[i]); + } } std::vector ModifyStrategyIfSqueezeOutgoing(const std::vector> &ops, @@ -598,31 +670,27 @@ void GenerateRemainingOperatorStrategy(const std::shared_ptr graph, return; } - for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) { - auto iter_ops = no_stra_op_list->at(iter_list - 1); + size_t no_stra_op_list_size; + do { + no_stra_op_list_size = no_stra_op_list->size(); + GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list); + GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list); + } while (no_stra_op_list_size > no_stra_op_list->size()); + + for (size_t iter_list = 0; iter_list < no_stra_op_list->size(); iter_list++) { + auto iter_ops = no_stra_op_list->at(iter_list); std::vector> stra; std::vector s; - size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops); - if (incoming_op_index != SIZE_MAX) { - auto iter_graph = index_list->at(incoming_op_index); - if (iter_graph != SIZE_MAX) { - s = CopyIncomingOperatorOutputStrategy(graph, ops, iter_ops, iter_graph); - } else { - s = CopyIncomingOperatorInputStrategy(ops, iter_ops, incoming_op_index); - } - } - if (s.size() == 0) { - size_t max_dim_num = 0; - for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { - if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) { - max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size(); - } - } - for (size_t i = 0; i < max_dim_num; i++) { - s.push_back(1); + size_t max_dim_num = 0; + for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { + if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) { + max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size(); } } + for (size_t i = 0; i < max_dim_num; i++) { + s.push_back(1); + } stra = GenerateStrategiesFromStrategy(ops, iter_ops, s); StrategyPtr sp = std::make_shared(0, stra); diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h index adde07da8d..acda2b8452 100644 --- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h +++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.h @@ -64,13 +64,13 @@ std::vector ModifyStrategyIfSqueezeIncoming(const std::vector GetDimList(const std::vector> &ops, const size_t iter_ops); std::vector ModifyStrategyIfReduceIncoming(const std::vector> &ops, const size_t incoming_op_index, std::vector s); +std::vector ModifyStrategyIfSoftmaxIncoming(std::vector s); std::vector CopyIncomingOperatorInputStrategy(const std::vector> &ops, const size_t iter_ops, const size_t incoming_op_index); std::vector> GenerateStrategiesFromStrategy(const std::vector> &ops, const size_t iter_ops, std::vector s); void GenerateEliminatedOperatorStrategyForward(std::shared_ptr graph, const std::vector> &ops, - const std::shared_ptr>> eli_list, const std::vector> &input_tensor_names, const std::shared_ptr> index_list, const std::shared_ptr> no_stra_op_list); diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc index c1ce0d626e..5ff15f3601 100644 --- a/mindspore/ccsrc/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc @@ -1101,13 +1101,22 @@ std::vector> RecInputTensorNames(const std::map &all_nodes, const FuncGraphPtr &root) { - if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) { - MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size() - << " operators."; + if (CostModelContext::GetInstance()->is_multi_subgraphs()) { + if (ConstructCostGraphNodesByUniqueIdTC(all_nodes, root) == SUCCESS) { + MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " + << entire_costgraph->GetOperators().size() << " operators."; + } else { + MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + } } else { - MS_LOG(ERROR) << "Constructing nodes for cost graph failed."; - return FAILED; + if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) { + MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " + << entire_costgraph->GetOperators().size() << " operators."; + } else { + MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed."; + } } + ReshapeCostCompute(all_nodes); auto ops = entire_costgraph->GetOperators(); std::vector> input_tensor_names = entire_costgraph->get_inputs_tensor_name_list();