fix exec order bug about monad

4 years ago · ef3507e973
parent 2f5483ebb3
commit ef3507e973
2 changed files with 118 additions and 12 deletions
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/optimize_assign.cc
@ -88,11 +88,14 @@ std::map<size_t, AnfNodePtr> FindAssignAndOutputVal(const CNodePtr &fg_cnode) {
  return output_replace_map;
 }

-bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user) {
+bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user, const AnfNodePtr &getitem) {
  auto mng = AnfAlgo::GetCNodeFuncGraphPtr(gk_node)->manager();
  MS_EXCEPTION_IF_NULL(mng);
  bool result = false;
-  auto IncludeUser = [&result, &gk_node](const AnfNodePtr &node) {
+  auto IncludeUser = [&result, &gk_node, &getitem](const AnfNodePtr &node) {
+    if (node == getitem) {
+      return EXCLUDE;
+    }
    if (node == gk_node) {
      result = true;
      return EXCLUDE;
@ -103,23 +106,23 @@ bool HasPathToParamUser(const AnfNodePtr &gk_node, const AnfNodePtr &param_user)
  return result;
 }

-void KeepExecOrder(const FuncGraphPtr &func_graph, const AnfNodePtr &gk_node, const AnfNodePtr &par_user_node,
+void KeepExecOrder(const FuncGraphPtr &func_graph, const AnfNodePtr &getitem, const AnfNodePtr &assign_to_node,
                   const FuncGraphManagerPtr &mng) {
  // Insert update_state_node, need mount a monad node.
  auto u = NewValueNode(kUMonad);
  u->set_abstract(kUMonad->ToAbstract());
-  AnfNodePtrList update_state_inputs = {NewValueNode(prim::kPrimUpdateState), u, gk_node};
+  AnfNodePtrList update_state_inputs = {NewValueNode(prim::kPrimUpdateState), u, getitem};
  auto update_state_node = func_graph->NewCNode(update_state_inputs);
-  update_state_node->set_abstract(gk_node->abstract());
+  update_state_node->set_abstract(getitem->abstract());
  func_graph->AddNode(update_state_node);

  // Insert load_node
-  AnfNodePtrList load_inputs = {NewValueNode(prim::kPrimLoad), par_user_node, update_state_node};
+  AnfNodePtrList load_inputs = {NewValueNode(prim::kPrimLoad), assign_to_node, update_state_node};
  auto load_node = func_graph->NewCNode(load_inputs);
-  load_node->set_abstract(par_user_node->abstract());
+  load_node->set_abstract(assign_to_node->abstract());
  func_graph->AddNode(load_node);

-  mng->Replace(gk_node, par_user_node);
+  mng->Replace(getitem, load_node);
 }

 int64_t GetitemIndex(const AnfNodePtr &getitem) {
@ -136,17 +139,18 @@ void UpdateUsersOfGraphKernel(const FuncGraphPtr &func_graph, const AnfNodePtr &
    auto getitem = getitem_iter.first;
    if (GetitemIndex(getitem) != removed_index) continue;
    auto getitem_users = mng->node_users()[getitem];  // get a copy of getitem's users before replacing
-    mng->Replace(getitem, assign_to);

    for (const auto &getitem_user_iter : getitem_users) {
      auto getitem_user = getitem_user_iter.first;
      // 1. A previous pass `DependFormater` has ensured that all data users are directly link to its
      //   input, without Depend node.
-      // 2. If the `cnode` has another path to the getitem_user, it's unnecessary to add a ControlDepend.
-      if (!AnfAlgo::IsRealKernel(getitem_user) || HasPathToParamUser(cnode, getitem_user)) {
+      // 2. If the `cnode` has another path to the getitem_user, it's unnecessary to add update_state and load node to
+      // keep exec_order.
+      if (!AnfAlgo::IsRealKernel(getitem_user) || HasPathToParamUser(cnode, getitem_user, getitem)) {
+        mng->Replace(getitem, assign_to);
        continue;
      }
-      KeepExecOrder(func_graph, cnode, getitem_user, mng);
+      KeepExecOrder(func_graph, getitem, assign_to, mng);
    }
    break;
  }
--- a/tests/st/ops/graph_kernel/test_optimize_assign.py
+++ b/tests/st/ops/graph_kernel/test_optimize_assign.py
@ -0,0 +1,102 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+import mindspore.context as context
+from mindspore import Tensor
+from mindspore.nn import Cell
+import mindspore.ops.operations as P
+from mindspore.ops import functional as F
+from mindspore.common.parameter import Parameter
+
+
+class TestOptAssignNet_1(Cell):
+    def __init__(self):
+        super(TestOptAssignNet_1, self).__init__()
+        self.add = P.Add()
+        self.reduce_max = P.ReduceMax()
+        self.param = Parameter(
+            Tensor(np.zeros([2, 2, 2]).astype(np.float32)), name='param')
+
+    def construct(self, x, y):
+        add_res = self.add(x, y)
+        F.depend(add_res, F.assign(self.param, add_res))
+
+        return self.reduce_max(add_res)
+
+
+class TestOptAssignNet_2(Cell):
+    def __init__(self):
+        super(TestOptAssignNet_2, self).__init__()
+        self.add = P.Add()
+        self.param = Parameter(
+            Tensor(np.zeros([2, 2, 2]).astype(np.float32)), name='param')
+
+    def construct(self, x, y):
+        add_res = self.add(x, y)
+        F.depend(add_res, F.assign(self.param, add_res))
+
+        return add_res
+
+
+def test_opt_assign_output_1():
+    np.random.seed(0)
+    input_x = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+    input_y = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=True, device_target="GPU")
+    net = TestOptAssignNet_1()
+    result_open_gk = net(Tensor(input_x), Tensor(input_y))
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=False, device_target="GPU")
+    net_beta = TestOptAssignNet_1()
+    result_close_gk = net_beta(Tensor(input_x), Tensor(input_y))
+    res = np.allclose(result_open_gk.asnumpy(), result_close_gk.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True)
+    assert res
+
+
+def test_opt_assign_output_2():
+    np.random.seed(0)
+    input_x = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+    input_y = np.random.normal(0, 1, [2, 2, 2]).astype(np.float32)
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=True, device_target="GPU")
+    net = TestOptAssignNet_2()
+    result_open_gk = net(Tensor(input_x), Tensor(input_y))
+
+    context.set_context(mode=context.GRAPH_MODE,
+                        enable_graph_kernel=False, device_target="GPU")
+    net_beta = TestOptAssignNet_2()
+    result_close_gk = net_beta(Tensor(input_x), Tensor(input_y))
+    res = np.allclose(result_open_gk.asnumpy(), result_close_gk.asnumpy(), rtol=1.e-4, atol=1.e-7, equal_nan=True)
+    assert res
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_opt_assign_gpu_1():
+    test_opt_assign_output_1()
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_opt_assign_gpu_2():
+    test_opt_assign_output_2()