|
|
|
@ -37,7 +37,10 @@ void GenerateStrategy(std::shared_ptr<Graph> graph, const std::vector<std::share
|
|
|
|
|
MS_EXCEPTION_IF_NULL(index_list);
|
|
|
|
|
GeneratePartitionedOperatorStrategy(graph, ops, index_list);
|
|
|
|
|
std::shared_ptr<std::vector<size_t>> no_stra_op_list(new std::vector<size_t>);
|
|
|
|
|
GenerateEliminatedOperatorStrategyForward(graph, ops, eli_list, input_tensor_names, index_list, no_stra_op_list);
|
|
|
|
|
for (size_t i = 0; i < eli_list->size(); i++) {
|
|
|
|
|
no_stra_op_list->push_back(eli_list->at(i)[0]);
|
|
|
|
|
}
|
|
|
|
|
GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list);
|
|
|
|
|
GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list);
|
|
|
|
|
GenerateRemainingOperatorStrategy(graph, ops, input_tensor_names, index_list, no_stra_op_list);
|
|
|
|
|
}
|
|
|
|
@ -49,6 +52,58 @@ std::vector<std::vector<int32_t>> PrepareMatMul(const std::shared_ptr<Graph> &gr
|
|
|
|
|
auto attrs = ops[iter_ops]->attrs();
|
|
|
|
|
bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
|
|
|
|
|
bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
|
|
|
|
|
|
|
|
|
|
// HCCL does not support multi-dimension partition, and the hardware does not support excessive
|
|
|
|
|
// number of EVENT, so we temporarily disable matmul's multi-dimension partition function.
|
|
|
|
|
auto max_cut = 1.0 / g_device_manager->DeviceNum();
|
|
|
|
|
if (graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h != max_cut &&
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w != max_cut) {
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = 1.0;
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_w = 1.0;
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_h = 1.0;
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = 1.0;
|
|
|
|
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0;
|
|
|
|
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0;
|
|
|
|
|
|
|
|
|
|
auto shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[0];
|
|
|
|
|
if (transpose_a) {
|
|
|
|
|
shape_1 = ops[iter_ops]->inputs_tensor_info()[0].shape()[1];
|
|
|
|
|
}
|
|
|
|
|
auto shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[1];
|
|
|
|
|
if (transpose_b) {
|
|
|
|
|
shape_4 = ops[iter_ops]->inputs_tensor_info()[1].shape()[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool already_cut = false;
|
|
|
|
|
if (shape_1 >= shape_4) {
|
|
|
|
|
if (shape_1 % g_device_manager->DeviceNum() == 0) {
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut;
|
|
|
|
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut;
|
|
|
|
|
already_cut = true;
|
|
|
|
|
}
|
|
|
|
|
if (!already_cut && shape_4 % g_device_manager->DeviceNum() == 0) {
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut;
|
|
|
|
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut;
|
|
|
|
|
already_cut = true;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (shape_4 % g_device_manager->DeviceNum() == 0) {
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[1].tensor_str.str_w = max_cut;
|
|
|
|
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = max_cut;
|
|
|
|
|
already_cut = true;
|
|
|
|
|
}
|
|
|
|
|
if (!already_cut && shape_1 % g_device_manager->DeviceNum() == 0) {
|
|
|
|
|
graph->nodes[iter_graph].apply.arguments[0].tensor_str.str_h = max_cut;
|
|
|
|
|
graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = max_cut;
|
|
|
|
|
already_cut = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!already_cut) {
|
|
|
|
|
MS_LOG(EXCEPTION) << "Failure: MatMul's shape is invalid.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
|
|
|
|
|
std::vector<int32_t> s;
|
|
|
|
|
if (transpose_a && (iter_op_inputs == 0)) {
|
|
|
|
@ -401,6 +456,11 @@ std::vector<int32_t> ModifyStrategyIfReduceIncoming(const std::vector<std::share
|
|
|
|
|
return s_Reduce;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> ModifyStrategyIfSoftmaxIncoming(std::vector<int32_t> s) {
|
|
|
|
|
s.pop_back();
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
|
|
|
const size_t iter_ops, const size_t incoming_op_index) {
|
|
|
|
|
std::vector<int32_t> s;
|
|
|
|
@ -414,6 +474,9 @@ std::vector<int32_t> CopyIncomingOperatorInputStrategy(const std::vector<std::sh
|
|
|
|
|
ops[incoming_op_index]->type() == REDUCE_MIN || ops[incoming_op_index]->type() == REDUCE_MEAN) {
|
|
|
|
|
s = ModifyStrategyIfReduceIncoming(ops, incoming_op_index, s);
|
|
|
|
|
}
|
|
|
|
|
if (ops[incoming_op_index]->type() == SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) {
|
|
|
|
|
s = ModifyStrategyIfSoftmaxIncoming(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
@ -466,12 +529,16 @@ std::vector<std::vector<int32_t>> GenerateStrategiesFromStrategy(const std::vect
|
|
|
|
|
|
|
|
|
|
void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> graph,
|
|
|
|
|
const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
|
|
|
const std::shared_ptr<std::vector<std::vector<size_t>>> eli_list,
|
|
|
|
|
const std::vector<std::vector<std::string>> &input_tensor_names,
|
|
|
|
|
const std::shared_ptr<std::vector<size_t>> index_list,
|
|
|
|
|
const std::shared_ptr<std::vector<size_t>> no_stra_op_list) {
|
|
|
|
|
for (size_t eli_index = eli_list->size(); eli_index > 0; eli_index--) {
|
|
|
|
|
size_t iter_ops = eli_list->at(eli_index - 1)[0];
|
|
|
|
|
if (no_stra_op_list->size() == 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
std::vector<size_t> no_stra_op_list_bis;
|
|
|
|
|
|
|
|
|
|
for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) {
|
|
|
|
|
size_t iter_ops = no_stra_op_list->at(iter_list - 1);
|
|
|
|
|
std::vector<std::vector<int32_t>> stra;
|
|
|
|
|
std::vector<int32_t> s;
|
|
|
|
|
size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops);
|
|
|
|
@ -485,7 +552,7 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> grap
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (s.size() == 0) {
|
|
|
|
|
no_stra_op_list->push_back(iter_ops);
|
|
|
|
|
no_stra_op_list_bis.push_back(iter_ops);
|
|
|
|
|
} else {
|
|
|
|
|
stra = GenerateStrategiesFromStrategy(ops, iter_ops, s);
|
|
|
|
|
}
|
|
|
|
@ -493,6 +560,11 @@ void GenerateEliminatedOperatorStrategyForward(const std::shared_ptr<Graph> grap
|
|
|
|
|
StrategyPtr sp = std::make_shared<Strategy>(0, stra);
|
|
|
|
|
ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
no_stra_op_list->clear();
|
|
|
|
|
for (size_t i = 0; i < no_stra_op_list_bis.size(); i++) {
|
|
|
|
|
no_stra_op_list->push_back(no_stra_op_list_bis[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> ModifyStrategyIfSqueezeOutgoing(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
|
|
|
|
@ -598,31 +670,27 @@ void GenerateRemainingOperatorStrategy(const std::shared_ptr<Graph> graph,
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t iter_list = no_stra_op_list->size(); iter_list > 0; iter_list--) {
|
|
|
|
|
auto iter_ops = no_stra_op_list->at(iter_list - 1);
|
|
|
|
|
size_t no_stra_op_list_size;
|
|
|
|
|
do {
|
|
|
|
|
no_stra_op_list_size = no_stra_op_list->size();
|
|
|
|
|
GenerateEliminatedOperatorStrategyForward(graph, ops, input_tensor_names, index_list, no_stra_op_list);
|
|
|
|
|
GenerateEliminatedOperatorStrategyBackward(ops, input_tensor_names, no_stra_op_list);
|
|
|
|
|
} while (no_stra_op_list_size > no_stra_op_list->size());
|
|
|
|
|
|
|
|
|
|
for (size_t iter_list = 0; iter_list < no_stra_op_list->size(); iter_list++) {
|
|
|
|
|
auto iter_ops = no_stra_op_list->at(iter_list);
|
|
|
|
|
std::vector<std::vector<int32_t>> stra;
|
|
|
|
|
std::vector<int32_t> s;
|
|
|
|
|
size_t incoming_op_index = FindIndexOfOperatorIncoming(input_tensor_names, iter_ops);
|
|
|
|
|
if (incoming_op_index != SIZE_MAX) {
|
|
|
|
|
auto iter_graph = index_list->at(incoming_op_index);
|
|
|
|
|
if (iter_graph != SIZE_MAX) {
|
|
|
|
|
s = CopyIncomingOperatorOutputStrategy(graph, ops, iter_ops, iter_graph);
|
|
|
|
|
} else {
|
|
|
|
|
s = CopyIncomingOperatorInputStrategy(ops, iter_ops, incoming_op_index);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (s.size() == 0) {
|
|
|
|
|
size_t max_dim_num = 0;
|
|
|
|
|
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
|
|
|
|
|
if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) {
|
|
|
|
|
max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (size_t i = 0; i < max_dim_num; i++) {
|
|
|
|
|
s.push_back(1);
|
|
|
|
|
size_t max_dim_num = 0;
|
|
|
|
|
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
|
|
|
|
|
if (ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size() > max_dim_num) {
|
|
|
|
|
max_dim_num = ops[iter_ops]->inputs_tensor_info()[iter_op_inputs].shape().size();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (size_t i = 0; i < max_dim_num; i++) {
|
|
|
|
|
s.push_back(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stra = GenerateStrategiesFromStrategy(ops, iter_ops, s);
|
|
|
|
|
StrategyPtr sp = std::make_shared<Strategy>(0, stra);
|
|
|
|
|