!727 [AutoParallel] complete cost for recursive programming

Merge pull request !727 from Chong/cost
5 years ago · 69ab46e624
parent 0f920b8219 309060b1c2
commit 69ab46e624
7 changed files with 288 additions and 347 deletions
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
@ -446,51 +446,8 @@ StrategyRec CostPooling::ChoseStr(const std::vector<double> &cost_op, StrategyRe
  return str;
 }

-// Get optimal strategy for Add
-StrategyRec CostAdd::GetOptimalStr(const Graph::NodeType &node,
-                                   const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                                   const Graph &graph) {
-  int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
-  int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
-  int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
-  int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
-
-  std::vector<double> cost_op;
-  std::vector<std::vector<float>> mode;
-
-  if (tensor_n < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
-  }
-
-  if (tensor_c < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
-  }
-
-  if (tensor_h < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
-  }
-
-  if (tensor_w < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
-  }
-
-  return ChoseStr(cost_op, node.apply.str);
-}
-
 // Chose strategy for Add
-StrategyRec CostAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
+StrategyRec CostTensorAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
  uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
  if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
    return str;
@ -540,49 +497,6 @@ StrategyRec CostReshape::GetOptimalStr(const Graph::NodeType &node) const { retu

 StrategyRec CostReshape::ChoseStr(StrategyRec str) const { return str; }

-// Get optimal strategy for Biasadd
-StrategyRec CostBiasAdd::GetOptimalStr(const Graph::NodeType &node,
-                                       const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                                       const Graph &graph) {
-  int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
-  int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
-  int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
-  int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
-
-  std::vector<double> cost_op;
-  std::vector<std::vector<float>> mode;
-
-  if (tensor_n < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
-  }
-
-  if (tensor_c < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
-  }
-
-  if (tensor_h < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
-  }
-
-  if (tensor_w < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
-  }
-
-  return ChoseStr(cost_op, node.apply.str);
-}
-
 // Chose strategy for BiasAdd
 StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
  uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
@ -629,7 +543,7 @@ StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRe
  return str;
 }

-// Get optimal strategy for Common OPs: ReLU and Softmax
+// Get optimal strategy for Common OPs
 StrategyRec CostCommon::GetOptimalStr(const Graph::NodeType &node,
                                      const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                                      const Graph &graph) {
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
@ -157,21 +157,6 @@ class CostPooling {
  double cost_in_ = 0;
 };  // class CostPooling is used to compute the cost of Pooling operator.

-// class CostAdd is used to compute the cost of Add operator.
-class CostAdd {
- public:
-  StrategyRec GetOptimalStr(const Graph::NodeType &node,
-                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                            const Graph &graph);
-
-  double GetMinCostIn() const { return cost_in_; }
-
- private:
-  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
-
-  double cost_in_ = 0;
-};  // class CostAdd is used to compute the cost of Add operator.
-
 // class CostReshape is used to compute the cost of Reshape operator.
 class CostReshape {
 public:
@ -185,35 +170,41 @@ class CostReshape {
  double cost_in_ = 0;
 };  // class CostReshape is used to compute the cost of Reshape operator.

-// class CostBiasAdd is used to compute the cost of BiasAdd operator.
-class CostBiasAdd {
+// class CostCommon is used to compute the cost of an element-wise operator
+class CostCommon {
 public:
-  StrategyRec GetOptimalStr(const Graph::NodeType &node,
-                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                            const Graph &graph);
+  virtual StrategyRec GetOptimalStr(const Graph::NodeType &node,
+                                    const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
+                                    const Graph &graph);

-  double GetMinCostIn() const { return cost_in_; }
+  virtual double GetMinCostIn() const { return cost_in_; }

- private:
-  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
+ protected:
+  virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);

  double cost_in_ = 0;
-};  // class CostBiasAdd is used to compute the cost of BiasAdd operator.
-
-// class CostCommon is used to compute the cost of the element independent operator.
-class CostCommon {
- public:
-  StrategyRec GetOptimalStr(const Graph::NodeType &node,
-                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                            const Graph &graph);
-
-  double GetMinCostIn() const { return cost_in_; }
+};  // class CostCommon is used to compute the cost of an element-wise operator

- private:
+// class CostBiasAdd is used to compute the cost of the addition between a tensor and a bias
+class CostBiasAdd : public CostCommon {
  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
-
-  double cost_in_ = 0;
-};  // class CostCommon is used to compute the cost of Softmax & || Activation operator.
+};
+// class CostAdd is used to compute the cost of Add operator.
+class CostTensorAdd : public CostCommon {
+  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
+};
+
+// all the following operation are element-wise and have the same cost
+class CostOneHot : public CostCommon {};
+class CostReLU : public CostCommon {};
+class CostLog : public CostCommon {};
+class CostExp : public CostCommon {};
+class CostAdd : public CostCommon {};
+class CostSub : public CostCommon {};
+class CostMul : public CostCommon {};
+class CostDiv : public CostCommon {};
+class CostSqueeze : public CostCommon {};
+class CostCast : public CostCommon {};

 // class BatchNorm is used to compute the cost of BatchNorm operator.
 class CostBatchNorm {
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@ -38,6 +38,12 @@ void GenerateStrategy(std::shared_ptr<Graph> graph, bool mask_special_ops,
    for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
      stra.push_back(PrepareStrategy(graph, ops, iter_ops, iter_op_inputs));
    }
+    // OneHot's scalar parameters were removed by entire_costgraph, we had to complete them.
+    if (ops[iter_ops]->type() == ONEHOT) {
+      std::vector<int32_t> s_Onehot = {};
+      stra.push_back(s_Onehot);
+      stra.push_back(s_Onehot);
+    }
    StrategyPtr sp = std::make_shared<Strategy>(0, stra);
    ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost());
  }
@ -201,12 +207,13 @@ std::vector<int32_t> PrepareStrategy(const std::shared_ptr<Graph> &graph,
  }
 }

+// use to respect strategy checks of auto parallel
 void MaskSpecialOps(std::shared_ptr<Graph> graph) {
  size_t iter_nodes = graph->nodes.size();
  for (size_t i = 0; i < iter_nodes; i++) {
    Graph::NodeType &node = graph->nodes[i];

-    if (node.apply.op_type == 1) {  // For Convolution
+    if (node.apply.op_type == kRecConvolution) {  // For convolution
      // cover input tensor strategy
      node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
      node.apply.arguments[0].tensor_str.str_c = 1;
@ -217,19 +224,12 @@ void MaskSpecialOps(std::shared_ptr<Graph> graph) {
      node.apply.arguments[1].tensor_str.str_c = 1;
      node.apply.arguments[1].tensor_str.str_h = 1;
      node.apply.arguments[1].tensor_str.str_w = 1;
-    } else if (node.apply.op_type == 8) {  // For BN
-      node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
-      node.apply.arguments[0].tensor_str.str_c = 1;
+    } else if (node.apply.op_type == kRecBiasAdd || node.apply.op_type == kRecMatMul) {
+      // For MatMul and BiasAdd
      node.apply.arguments[0].tensor_str.str_h = 1;
      node.apply.arguments[0].tensor_str.str_w = 1;
-      // cover 1-d argument blobs
-      node.apply.arguments[1].tensor_str.str_n = 1;
-      node.apply.arguments[2].tensor_str.str_c = 1;
-      node.apply.arguments[3].tensor_str.str_h = 1;
-      node.apply.arguments[4].tensor_str.str_w = 1;
-    } else if (node.apply.op_type == 4 || node.apply.op_type == 9) {  // For SparseSoftmaxCrossEntropyWithLogits
-      node.tensor_parm.tensor_str.str_h = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
-      node.tensor_parm.tensor_str.str_w = 1;
+      node.apply.arguments[1].tensor_str.str_h = 1;
+      node.apply.arguments[1].tensor_str.str_w = 1;
    }
  }
 }
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
@ -27,17 +27,26 @@
 namespace mindspore {
 namespace parallel {
 enum OperatorType {
+  kRecUnkownType,
  kRecMatMul,
  kRecConvolution,
  kRecPooling,
-  kRecAdd,
-  kRecSoftmax,
-  kRecReshape,
-  kRecBiasAdd,
+  kRecTensorAdd,
  kRecReLU,
  kRecBatchNorm,
+  kRecReshape,
+  kRecBiasAdd,
+  kRecSoftmax,
  kRecSparseSoftmaxCrossEntropyWithLogits,
-  kRecUnkownType
+  kRecOneHot,
+  kRecLog,
+  kRecExp,
+  kRecAdd,
+  kRecSub,
+  kRecMul,
+  kRecDiv,
+  kRecSqueeze,
+  kRecCast
 };

 enum InfoType { kApplication, kConstant };
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
@ -31,15 +31,23 @@ namespace parallel {
 const std::map<std::string, OperatorType> DictOpType{
  {MATMUL, OperatorType::kRecMatMul},
  {CONV2D, OperatorType::kRecConvolution},
+  {MAXPOOL, OperatorType::kRecPooling},
  {MAXPOOLV2, OperatorType::kRecPooling},
  {SIMPLE_MEAN, OperatorType::kRecPooling},
-  {TENSOR_ADD, OperatorType::kRecAdd},
+  {TENSOR_ADD, OperatorType::kRecTensorAdd},
  {RESHAPE, OperatorType::kRecReshape},
  {BIAS_ADD, OperatorType::kRecBiasAdd},
  {RELU, OperatorType::kRecReLU},
  {BATCH_NORM, OperatorType::kRecBatchNorm},
  {SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits},
-};
+  {ONEHOT, OperatorType::kRecOneHot},
+  {LOG, OperatorType::kRecLog},
+  {EXP, OperatorType::kRecExp},
+  {SUB, OperatorType::kRecSub},
+  {MUL, OperatorType::kRecMul},
+  {DIV, OperatorType::kRecDiv},
+  {SQUEEZE, OperatorType::kRecSqueeze},
+  {CAST, OperatorType::kRecCast}};

 const TensorParam MakeTensor(int n, int c, int h, int w);

--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
@ -48,14 +48,14 @@ double GetWeights(const Graph::NodeType &node) {
    auto cost_ptr = std::make_shared<CostPooling>();

    return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == OperatorType::kRecAdd) {
-    // For Add
-    auto cost_ptr = std::make_shared<CostAdd>();
+  } else if (op.op_type == OperatorType::kRecTensorAdd) {
+    // For TensorAdd
+    auto cost_ptr = std::make_shared<CostTensorAdd>();

    return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == OperatorType::kRecSoftmax || op.op_type == OperatorType::kRecReLU ||
+  } else if (op.op_type == OperatorType::kRecReLU || op.op_type == OperatorType::kRecSoftmax ||
             op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
-    // For Softmax & || Activation
+    // For Activation and Softmax
    auto cost_ptr = std::make_shared<CostCommon>();

    return cost_ptr->GetMinCostIn();
@ -73,6 +73,15 @@ double GetWeights(const Graph::NodeType &node) {
    // For BatchNorm
    auto cost_ptr = std::make_shared<CostBatchNorm>();

+    return cost_ptr->GetMinCostIn();
+  } else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog ||
+             op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd ||
+             op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul ||
+             op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze ||
+             op.op_type == OperatorType::kRecCast) {
+    // For element-wise op
+    auto cost_ptr = std::make_shared<CostCommon>();
+
    return cost_ptr->GetMinCostIn();
  } else if (op.op_type == OperatorType::kRecUnkownType) {
    // For unknown type
@ -117,47 +126,57 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
                          std::shared_ptr<Graph> graph) {
  MS_EXCEPTION_IF_NULL(graph);

-  if (node.apply.op_type == 0) {
+  if (node.apply.op_type == OperatorType::kRecMatMul) {
    // For MatMul
    auto cost_ptr = std::make_shared<CostMatMul>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 1) {
+  } else if (node.apply.op_type == OperatorType::kRecConvolution) {
    // For Convolution
    auto cost_ptr = std::make_shared<CostConvolution>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 2) {
+  } else if (node.apply.op_type == OperatorType::kRecPooling) {
    // For Pooling
    auto cost_ptr = std::make_shared<CostPooling>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 3) {
-    // For Add
-    auto cost_ptr = std::make_shared<CostAdd>();
+  } else if (node.apply.op_type == OperatorType::kRecTensorAdd) {
+    // For TensorAdd
+    auto cost_ptr = std::make_shared<CostTensorAdd>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 4 || node.apply.op_type == 7 || node.apply.op_type == 9) {
+  } else if (node.apply.op_type == OperatorType::kRecReLU || node.apply.op_type == OperatorType::kRecSoftmax ||
+             node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
    // For Softmax & Activation
    auto cost_ptr = std::make_shared<CostCommon>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 5) {
+  } else if (node.apply.op_type == OperatorType::kRecReshape) {
    // For Reshape
    auto cost_ptr = std::make_shared<CostReshape>();

    return cost_ptr->GetOptimalStr(node);
-  } else if (node.apply.op_type == 6) {
+  } else if (node.apply.op_type == OperatorType::kRecBiasAdd) {
    // For BiasAdd
    auto cost_ptr = std::make_shared<CostBiasAdd>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 8) {
+  } else if (node.apply.op_type == OperatorType::kRecBatchNorm) {
    // For BatchNorm
    auto cost_ptr = std::make_shared<CostBatchNorm>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 10) {
+  } else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog ||
+             node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd ||
+             node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul ||
+             node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze ||
+             node.apply.op_type == OperatorType::kRecCast) {
+    // For element-wise op
+    auto cost_ptr = std::make_shared<CostCommon>();
+
+    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
+  } else if (node.apply.op_type == OperatorType::kRecUnkownType) {
    // For unknown type
    StrategyRec default_strategy;
    return default_strategy;