From ef3507e97378395f5d29748873b343f31b8e0b66 Mon Sep 17 00:00:00 2001
From: zengzitao <zengzitao@huawei.com>
Date: Mon, 8 Feb 2021 11:26:12 +0800
Subject: [PATCH] fix exec order bug about monad

---
 .../optimizer/graph_kernel/optimize_assign.cc |  28 ++---
 .../ops/graph_kernel/test_optimize_assign.py  | 102 ++++++++++++++++++
 2 files changed, 118 insertions(+), 12 deletions(-)
 create mode 100644 tests/st/ops/graph_kernel/test_optimize_assign.py

diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
index 875bf6701c..f57a87f8d3 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
@@ -88,11 +88,14 @@ std::map<size_t, AnfNodePtr> FindAssignAndOutputVal(const CNodePtr &fg_cnode) {
   return output_replace_map;
 }
 
-bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user) {
+bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user, const AnfNodePtr &getitem) {
   auto mng = AnfAlgo::GetCNodeFuncGraphPtr(gk_node)->manager();
   MS_EXCEPTION_IF_NULL(mng);
   bool result = false;
-  auto IncludeUser = [&result, &gk_node](const AnfNodePtr &node) {
+  auto IncludeUser = [&result, &gk_node, &getitem](const AnfNodePtr &node) {
+    if (node == getitem) {
+      return EXCLUDE;
+    }
     if (node == gk_node) {
       result = true;
       return EXCLUDE;
@@ -103,23 +106,23 @@ bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user)
   return result;
 }
 
-void KeepExecOrder(const FuncGraphPtr &func_graph, const AnfNodePtr &gk_node, const AnfNodePtr &par_user_node,
+void KeepExecOrder(const FuncGraphPtr &func_graph, const AnfNodePtr &getitem, const AnfNodePtr &assign_to_node,
                    const FuncGraphManagerPtr &mng) {
   // Insert update_state_node, need mount a monad node.
   auto u = NewValueNode(kUMonad);
   u->set_abstract(kUMonad->ToAbstract());
-  AnfNodePtrList update_state_inputs = {NewValueNode(prim::kPrimUpdateState), u, gk_node};
+  AnfNodePtrList update_state_inputs = {NewValueNode(prim::kPrimUpdateState), u, getitem};
   auto update_state_node = func_graph->NewCNode(update_state_inputs);
-  update_state_node->set_abstract(gk_node->abstract());
+  update_state_node->set_abstract(getitem->abstract());
   func_graph->AddNode(update_state_node);
 
   // Insert load_node
-  AnfNodePtrList load_inputs = {NewValueNode(prim::kPrimLoad), par_user_node, update_state_node};
+  AnfNodePtrList load_inputs = {NewValueNode(prim::kPrimLoad), assign_to_node, update_state_node};
   auto load_node = func_graph->NewCNode(load_inputs);
-  load_node->set_abstract(par_user_node->abstract());
+  load_node->set_abstract(assign_to_node->abstract());
   func_graph->AddNode(load_node);
 
-  mng->Replace(gk_node, par_user_node);
+  mng->Replace(getitem, load_node);
 }
 
 int64_t GetitemIndex(const AnfNodePtr &getitem) {
@@ -136,17 +139,18 @@ void UpdateUsersOfGraphKernel(const FuncGraphPtr &func_graph, const AnfNodePtr &
     auto getitem = getitem_iter.first;
     if (GetitemIndex(getitem) != removed_index) continue;
     auto getitem_users = mng->node_users()[getitem];  // get a copy of getitem's users before replacing
-    mng->Replace(getitem, assign_to);
 
     for (const auto &getitem_user_iter : getitem_users) {
       auto getitem_user = getitem_user_iter.first;
       // 1. A previous pass `DependFormater` has ensured that all data users are directly link to its
       //   input, without Depend node.
-      // 2. If the `cnode` has another path to the getitem_user, it's unnecessary to add a ControlDepend.
-      if (!AnfAlgo::IsRealKernel(getitem_user) || HasPathToParamUser(cnode, getitem_user)) {
+      // 2. If the `cnode` has another path to the getitem_user, it's unnecessary to add update_state and load node to
+      // keep exec_order.
+      if (!AnfAlgo::IsRealKernel(getitem_user) || HasPathToParamUser(cnode, getitem_user, getitem)) {
+        mng->Replace(getitem, assign_to);
         continue;
       }
-      KeepExecOrder(func_graph, cnode, getitem_user, mng);
+      KeepExecOrder(func_graph, getitem, assign_to, mng);
     }
     break;
   }
diff --git a/tests/st/ops/graph_kernel/test_optimize_assign.py b/tests/st/ops/graph_kernel/test_optimize_assign.py
new file mode 100644
index 0000000000..026acd2b34
--- /dev/null
+++ b/tests/st/ops/graph_kernel/test_optimize_assign.py
@@ -0,0 +1,102 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+import mindspore.context as context
+from mindspore import Tensor
+from mindspore.nn import Cell
+import mindspore.ops.operations as P
+from mindspore.ops import functional as F
+from mindspore.common.parameter import Parameter
+
+
+class TestOptAssignNet_1(Cell):
+    def __init__(self):
+        super(TestOptAssignNet_1, self).__init__()
+        self.add = P.Add()
+        self.reduce_max = P.ReduceMax()
+        self.param = Parameter(
+            Tensor(np.zeros([2, 2, 2]).astype(np.float32)), name='param')
+
+    def construct(self, x, y):
+        add_res = self.add(x, y)
+        F.depend(add_res, F.assign(self.param, add_res))
+
+        return self.reduce_max(add_res)
+
+
+class TestOptAssignNet_2(Cell):
+    def __init__(self):
+        super(TestOptAssignNet_2, self).__init__()
+        self.add = P.Add()
+        self.param = Parameter(
+            Tensor(np.zeros([2, 2, 2]).astype(np.float32)), name='param')
+
+    def construct(self, x, y):
+        add_res = self.add(x, y)
+        F.depend(add_res, F.assign(self.param, add_res))
+
+        return add_res
+
+
+def test_opt_assign_output_1():
+    np.random.seed(0)
+    input_x = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+    input_y = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=True, device_target="GPU")
+    net = TestOptAssignNet_1()
+    result_open_gk = net(Tensor(input_x), Tensor(input_y))
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=False, device_target="GPU")
+    net_beta = TestOptAssignNet_1()
+    result_close_gk = net_beta(Tensor(input_x), Tensor(input_y))
+    res = np.allclose(result_open_gk.asnumpy(), result_close_gk.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True)
+    assert res
+
+
+def test_opt_assign_output_2():
+    np.random.seed(0)
+    input_x = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+    input_y = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=True, device_target="GPU")
+    net = TestOptAssignNet_2()
+    result_open_gk = net(Tensor(input_x), Tensor(input_y))
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=False, device_target="GPU")
+    net_beta = TestOptAssignNet_2()
+    result_close_gk = net_beta(Tensor(input_x), Tensor(input_y))
+    res = np.allclose(result_open_gk.asnumpy(), result_close_gk.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True)
+    assert res
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_opt_assign_gpu_1():
+    test_opt_assign_output_1()
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_opt_assign_gpu_2():
+    test_opt_assign_output_2()