|
|
|
@ -228,115 +228,6 @@ class ConstantDuplicateMul : public AnfVisitor {
|
|
|
|
|
CNodePtr cnode_;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// grad = AllReduce(grad) / worker_number
|
|
|
|
|
// grad = grad + weight * decy
|
|
|
|
|
// ->
|
|
|
|
|
// grad = grad + weight * decy
|
|
|
|
|
// grad = AllReduce(grad) / worker_number
|
|
|
|
|
|
|
|
|
|
// {prim::kPrimAddN, {prim::kPrimMakeTuple, {prim::kPrimMul, {prim::kPrimAllReduce, X}, Y}, Z}} ->
|
|
|
|
|
// {prim::kPrimMul, {prim::kPrimAllReduce, {prim::kPrimAddN,{prim::kPrimMakeTuple, Z, X}}}, Y}
|
|
|
|
|
class AdjustAllReduceMulAdd : public AnfVisitor {
|
|
|
|
|
public:
|
|
|
|
|
AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
|
|
|
|
|
Reset();
|
|
|
|
|
// {prim::kPrimAddN, Zs}
|
|
|
|
|
if (!IsPrimitiveCNode(node, prim::kPrimAddN)) {
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
auto addn = node->cast<CNodePtr>();
|
|
|
|
|
if (addn->size() != 2) {
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
AnfVisitor::Match(prim::kPrimMakeTuple, {IsNode, IsNode})(addn->input(1));
|
|
|
|
|
if (x_ == nullptr || y_ == nullptr || z_ == nullptr || all_reduce_fg_ == nullptr) {
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
auto addn_maketuple = addn->input(1);
|
|
|
|
|
|
|
|
|
|
auto fg = all_reduce_fg_;
|
|
|
|
|
// addn inputs cross the graph, make the inputs same as allreduce node.
|
|
|
|
|
if (z_->isa<CNode>() && fg != z_->func_graph()) {
|
|
|
|
|
auto cnode_z = z_->cast<CNodePtr>();
|
|
|
|
|
z_ = NewCNode(cnode_z->inputs(), fg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto addn_op_node = addn->input(0);
|
|
|
|
|
auto make_tuple_op_node = addn->input(1)->cast<CNodePtr>()->input(0);
|
|
|
|
|
|
|
|
|
|
AnfNodePtr tuple = NewCNode({make_tuple_op_node, z_, x_}, fg);
|
|
|
|
|
AnfNodePtr add = NewCNode({addn_op_node, tuple}, fg);
|
|
|
|
|
AnfNodePtr all_reduce = NewCNode({all_reduce_, add}, fg);
|
|
|
|
|
AnfNodePtr mul = NewCNode({mul_, all_reduce, y_}, fg);
|
|
|
|
|
ProcessDependEdge(fg, addn_maketuple, all_reduce);
|
|
|
|
|
return mul;
|
|
|
|
|
}
|
|
|
|
|
void ProcessDependEdge(const FuncGraphPtr &fg, const AnfNodePtr &addn_maketuple, const AnfNodePtr &new_node) {
|
|
|
|
|
// If has dynamic loss scale.
|
|
|
|
|
auto &users_map = fg->manager()->node_users();
|
|
|
|
|
auto it = users_map.find(mul_cnode_);
|
|
|
|
|
if (it != users_map.end()) {
|
|
|
|
|
auto users = it->second;
|
|
|
|
|
for (auto &user_pair : users) {
|
|
|
|
|
auto node = user_pair.first;
|
|
|
|
|
if (node != addn_maketuple) {
|
|
|
|
|
if (IsPrimitiveCNode(node, prim::kPrimMakeTuple)) {
|
|
|
|
|
fg->manager()->SetEdge(node, user_pair.second, new_node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
void Visit(const AnfNodePtr &node) override {
|
|
|
|
|
if (level_ == 0) {
|
|
|
|
|
level_ = 1;
|
|
|
|
|
is_reduce_match_ = false;
|
|
|
|
|
// {prim::kPrimMul, {prim::kPrimAllReduce, X}, Y}
|
|
|
|
|
AnfVisitor::Match(prim::kPrimMul)(node);
|
|
|
|
|
level_ = 0;
|
|
|
|
|
if (is_reduce_match_) {
|
|
|
|
|
mul_ = node->cast<CNodePtr>()->input(0);
|
|
|
|
|
mul_cnode_ = node->cast<CNodePtr>();
|
|
|
|
|
y_ = tmp_;
|
|
|
|
|
} else {
|
|
|
|
|
z_ = node;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (level_ == 1) {
|
|
|
|
|
// {prim::kPrimAllReduce, X}
|
|
|
|
|
if (IsPrimitiveCNode(node, prim::kPrimAllReduce)) {
|
|
|
|
|
auto cnode = node->cast<CNodePtr>();
|
|
|
|
|
if (cnode->size() > 1) {
|
|
|
|
|
all_reduce_ = cnode->input(0);
|
|
|
|
|
x_ = cnode->input(1);
|
|
|
|
|
is_reduce_match_ = true;
|
|
|
|
|
all_reduce_fg_ = cnode->func_graph();
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
tmp_ = node;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Reset() {
|
|
|
|
|
level_ = 0;
|
|
|
|
|
is_reduce_match_ = false;
|
|
|
|
|
x_ = nullptr;
|
|
|
|
|
y_ = nullptr;
|
|
|
|
|
z_ = nullptr;
|
|
|
|
|
tmp_ = nullptr;
|
|
|
|
|
all_reduce_fg_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
int level_{0};
|
|
|
|
|
bool is_reduce_match_{false};
|
|
|
|
|
AnfNodePtr x_{nullptr}, y_{nullptr}, z_{nullptr}, tmp_{nullptr};
|
|
|
|
|
AnfNodePtr all_reduce_{nullptr}, mul_{nullptr}, mul_cnode_{nullptr};
|
|
|
|
|
FuncGraphPtr all_reduce_fg_{nullptr};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class ArithmeticSimplify {
|
|
|
|
|
public:
|
|
|
|
|
ArithmeticSimplify()
|
|
|
|
|