!727 [AutoParallel] complete cost for recursive programming

Merge pull request !727 from Chong/cost
pull/727/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 69ab46e624

@ -446,51 +446,8 @@ StrategyRec CostPooling::ChoseStr(const std::vector<double> &cost_op, StrategyRe
return str;
}
// Get optimal strategy for Add
StrategyRec CostAdd::GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph) {
int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
std::vector<double> cost_op;
std::vector<std::vector<float>> mode;
if (tensor_n < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
}
if (tensor_c < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
}
if (tensor_h < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
}
if (tensor_w < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
}
return ChoseStr(cost_op, node.apply.str);
}
// Chose strategy for Add
StrategyRec CostAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
StrategyRec CostTensorAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
return str;
@ -540,49 +497,6 @@ StrategyRec CostReshape::GetOptimalStr(const Graph::NodeType &node) const { retu
StrategyRec CostReshape::ChoseStr(StrategyRec str) const { return str; }
// Get optimal strategy for Biasadd
StrategyRec CostBiasAdd::GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph) {
int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
std::vector<double> cost_op;
std::vector<std::vector<float>> mode;
if (tensor_n < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
}
if (tensor_c < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
}
if (tensor_h < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
}
if (tensor_w < 2) {
cost_op.push_back(DOUBLE_MAX);
} else {
cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
}
return ChoseStr(cost_op, node.apply.str);
}
// Chose strategy for BiasAdd
StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
@ -629,7 +543,7 @@ StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRe
return str;
}
// Get optimal strategy for Common OPs: ReLU and Softmax
// Get optimal strategy for Common OPs
StrategyRec CostCommon::GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph) {

@ -157,21 +157,6 @@ class CostPooling {
double cost_in_ = 0;
}; // class CostPooling is used to compute the cost of Pooling operator.
// class CostAdd is used to compute the cost of Add operator.
class CostAdd {
public:
StrategyRec GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph);
double GetMinCostIn() const { return cost_in_; }
private:
StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
double cost_in_ = 0;
}; // class CostAdd is used to compute the cost of Add operator.
// class CostReshape is used to compute the cost of Reshape operator.
class CostReshape {
public:
@ -185,35 +170,41 @@ class CostReshape {
double cost_in_ = 0;
}; // class CostReshape is used to compute the cost of Reshape operator.
// class CostBiasAdd is used to compute the cost of BiasAdd operator.
class CostBiasAdd {
// class CostCommon is used to compute the cost of an element-wise operator
class CostCommon {
public:
StrategyRec GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph);
virtual StrategyRec GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph);
double GetMinCostIn() const { return cost_in_; }
virtual double GetMinCostIn() const { return cost_in_; }
private:
StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
protected:
virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
double cost_in_ = 0;
}; // class CostBiasAdd is used to compute the cost of BiasAdd operator.
// class CostCommon is used to compute the cost of the element independent operator.
class CostCommon {
public:
StrategyRec GetOptimalStr(const Graph::NodeType &node,
const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
const Graph &graph);
double GetMinCostIn() const { return cost_in_; }
}; // class CostCommon is used to compute the cost of an element-wise operator
private:
// class CostBiasAdd is used to compute the cost of the addition between a tensor and a bias
class CostBiasAdd : public CostCommon {
StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
double cost_in_ = 0;
}; // class CostCommon is used to compute the cost of Softmax & || Activation operator.
};
// class CostAdd is used to compute the cost of Add operator.
class CostTensorAdd : public CostCommon {
StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
};
// all the following operation are element-wise and have the same cost
class CostOneHot : public CostCommon {};
class CostReLU : public CostCommon {};
class CostLog : public CostCommon {};
class CostExp : public CostCommon {};
class CostAdd : public CostCommon {};
class CostSub : public CostCommon {};
class CostMul : public CostCommon {};
class CostDiv : public CostCommon {};
class CostSqueeze : public CostCommon {};
class CostCast : public CostCommon {};
// class BatchNorm is used to compute the cost of BatchNorm operator.
class CostBatchNorm {

@ -38,6 +38,12 @@ void GenerateStrategy(std::shared_ptr<Graph> graph, bool mask_special_ops,
for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
stra.push_back(PrepareStrategy(graph, ops, iter_ops, iter_op_inputs));
}
// OneHot's scalar parameters were removed by entire_costgraph, we had to complete them.
if (ops[iter_ops]->type() == ONEHOT) {
std::vector<int32_t> s_Onehot = {};
stra.push_back(s_Onehot);
stra.push_back(s_Onehot);
}
StrategyPtr sp = std::make_shared<Strategy>(0, stra);
ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost());
}
@ -201,12 +207,13 @@ std::vector<int32_t> PrepareStrategy(const std::shared_ptr<Graph> &graph,
}
}
// use to respect strategy checks of auto parallel
void MaskSpecialOps(std::shared_ptr<Graph> graph) {
size_t iter_nodes = graph->nodes.size();
for (size_t i = 0; i < iter_nodes; i++) {
Graph::NodeType &node = graph->nodes[i];
if (node.apply.op_type == 1) { // For Convolution
if (node.apply.op_type == kRecConvolution) { // For convolution
// cover input tensor strategy
node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
node.apply.arguments[0].tensor_str.str_c = 1;
@ -217,19 +224,12 @@ void MaskSpecialOps(std::shared_ptr<Graph> graph) {
node.apply.arguments[1].tensor_str.str_c = 1;
node.apply.arguments[1].tensor_str.str_h = 1;
node.apply.arguments[1].tensor_str.str_w = 1;
} else if (node.apply.op_type == 8) { // For BN
node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
node.apply.arguments[0].tensor_str.str_c = 1;
} else if (node.apply.op_type == kRecBiasAdd || node.apply.op_type == kRecMatMul) {
// For MatMul and BiasAdd
node.apply.arguments[0].tensor_str.str_h = 1;
node.apply.arguments[0].tensor_str.str_w = 1;
// cover 1-d argument blobs
node.apply.arguments[1].tensor_str.str_n = 1;
node.apply.arguments[2].tensor_str.str_c = 1;
node.apply.arguments[3].tensor_str.str_h = 1;
node.apply.arguments[4].tensor_str.str_w = 1;
} else if (node.apply.op_type == 4 || node.apply.op_type == 9) { // For SparseSoftmaxCrossEntropyWithLogits
node.tensor_parm.tensor_str.str_h = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
node.tensor_parm.tensor_str.str_w = 1;
node.apply.arguments[1].tensor_str.str_h = 1;
node.apply.arguments[1].tensor_str.str_w = 1;
}
}
}

@ -27,17 +27,26 @@
namespace mindspore {
namespace parallel {
enum OperatorType {
kRecUnkownType,
kRecMatMul,
kRecConvolution,
kRecPooling,
kRecAdd,
kRecSoftmax,
kRecReshape,
kRecBiasAdd,
kRecTensorAdd,
kRecReLU,
kRecBatchNorm,
kRecReshape,
kRecBiasAdd,
kRecSoftmax,
kRecSparseSoftmaxCrossEntropyWithLogits,
kRecUnkownType
kRecOneHot,
kRecLog,
kRecExp,
kRecAdd,
kRecSub,
kRecMul,
kRecDiv,
kRecSqueeze,
kRecCast
};
enum InfoType { kApplication, kConstant };

@ -31,15 +31,23 @@ namespace parallel {
const std::map<std::string, OperatorType> DictOpType{
{MATMUL, OperatorType::kRecMatMul},
{CONV2D, OperatorType::kRecConvolution},
{MAXPOOL, OperatorType::kRecPooling},
{MAXPOOLV2, OperatorType::kRecPooling},
{SIMPLE_MEAN, OperatorType::kRecPooling},
{TENSOR_ADD, OperatorType::kRecAdd},
{TENSOR_ADD, OperatorType::kRecTensorAdd},
{RESHAPE, OperatorType::kRecReshape},
{BIAS_ADD, OperatorType::kRecBiasAdd},
{RELU, OperatorType::kRecReLU},
{BATCH_NORM, OperatorType::kRecBatchNorm},
{SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits},
};
{ONEHOT, OperatorType::kRecOneHot},
{LOG, OperatorType::kRecLog},
{EXP, OperatorType::kRecExp},
{SUB, OperatorType::kRecSub},
{MUL, OperatorType::kRecMul},
{DIV, OperatorType::kRecDiv},
{SQUEEZE, OperatorType::kRecSqueeze},
{CAST, OperatorType::kRecCast}};
const TensorParam MakeTensor(int n, int c, int h, int w);

@ -48,14 +48,14 @@ double GetWeights(const Graph::NodeType &node) {
auto cost_ptr = std::make_shared<CostPooling>();
return cost_ptr->GetMinCostIn();
} else if (op.op_type == OperatorType::kRecAdd) {
// For Add
auto cost_ptr = std::make_shared<CostAdd>();
} else if (op.op_type == OperatorType::kRecTensorAdd) {
// For TensorAdd
auto cost_ptr = std::make_shared<CostTensorAdd>();
return cost_ptr->GetMinCostIn();
} else if (op.op_type == OperatorType::kRecSoftmax || op.op_type == OperatorType::kRecReLU ||
} else if (op.op_type == OperatorType::kRecReLU || op.op_type == OperatorType::kRecSoftmax ||
op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
// For Softmax & || Activation
// For Activation and Softmax
auto cost_ptr = std::make_shared<CostCommon>();
return cost_ptr->GetMinCostIn();
@ -73,6 +73,15 @@ double GetWeights(const Graph::NodeType &node) {
// For BatchNorm
auto cost_ptr = std::make_shared<CostBatchNorm>();
return cost_ptr->GetMinCostIn();
} else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog ||
op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd ||
op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul ||
op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze ||
op.op_type == OperatorType::kRecCast) {
// For element-wise op
auto cost_ptr = std::make_shared<CostCommon>();
return cost_ptr->GetMinCostIn();
} else if (op.op_type == OperatorType::kRecUnkownType) {
// For unknown type
@ -117,47 +126,57 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
std::shared_ptr<Graph> graph) {
MS_EXCEPTION_IF_NULL(graph);
if (node.apply.op_type == 0) {
if (node.apply.op_type == OperatorType::kRecMatMul) {
// For MatMul
auto cost_ptr = std::make_shared<CostMatMul>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 1) {
} else if (node.apply.op_type == OperatorType::kRecConvolution) {
// For Convolution
auto cost_ptr = std::make_shared<CostConvolution>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 2) {
} else if (node.apply.op_type == OperatorType::kRecPooling) {
// For Pooling
auto cost_ptr = std::make_shared<CostPooling>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 3) {
// For Add
auto cost_ptr = std::make_shared<CostAdd>();
} else if (node.apply.op_type == OperatorType::kRecTensorAdd) {
// For TensorAdd
auto cost_ptr = std::make_shared<CostTensorAdd>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 4 || node.apply.op_type == 7 || node.apply.op_type == 9) {
} else if (node.apply.op_type == OperatorType::kRecReLU || node.apply.op_type == OperatorType::kRecSoftmax ||
node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
// For Softmax & Activation
auto cost_ptr = std::make_shared<CostCommon>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 5) {
} else if (node.apply.op_type == OperatorType::kRecReshape) {
// For Reshape
auto cost_ptr = std::make_shared<CostReshape>();
return cost_ptr->GetOptimalStr(node);
} else if (node.apply.op_type == 6) {
} else if (node.apply.op_type == OperatorType::kRecBiasAdd) {
// For BiasAdd
auto cost_ptr = std::make_shared<CostBiasAdd>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 8) {
} else if (node.apply.op_type == OperatorType::kRecBatchNorm) {
// For BatchNorm
auto cost_ptr = std::make_shared<CostBatchNorm>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == 10) {
} else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog ||
node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd ||
node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul ||
node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze ||
node.apply.op_type == OperatorType::kRecCast) {
// For element-wise op
auto cost_ptr = std::make_shared<CostCommon>();
return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
} else if (node.apply.op_type == OperatorType::kRecUnkownType) {
// For unknown type
StrategyRec default_strategy;
return default_strategy;

Loading…
Cancel
Save