!7884 Pynative support dynamic shape op

Merge pull request !7884 from JoyLvliang/pynative-support-dynamic-shape
5 years ago · 57fc8f45ae
parent 070886802b fd5be43598
commit 57fc8f45ae
11 changed files with 186 additions and 22 deletions
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@ -390,6 +390,8 @@ void AscendSession::RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &gra
  MS_LOG(INFO) << "Run op " << op_run_info.op_name << " start!";
  // malloc mem
  RunOpMemoryAlloc(op_run_info.value, input_tensors, graph.get());
+  // Build dynamic kernel
+  BuildDynamicKernel(graph);
  // load input data to device
  LoadInputData(graph, input_tensors);
  // run op
@ -510,6 +512,17 @@ void AscendSession::BuildKernel(const std::shared_ptr<KernelGraph> &kernel_graph
  MS_LOG(INFO) << "Finish!";
 }

+void AscendSession::BuildDynamicKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const {
+  MS_LOG(INFO) << "Start!";
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
+  MS_EXCEPTION_IF_NULL(runtime_instance);
+  if (!runtime_instance->GenDynamicKernel(kernel_graph.get())) {
+    MS_LOG(DEBUG) << "Graph:" << kernel_graph->graph_id() << " failed to generate dynamic kernel!";
+  }
+  MS_LOG(INFO) << "Finish!";
+}
+
 void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const {
  MS_LOG(INFO) << "Start!";
  MS_EXCEPTION_IF_NULL(kernel_graph);
--- a/mindspore/ccsrc/backend/session/ascend_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_session.h
@ -90,6 +90,7 @@ class AscendSession : public SessionBasic {
  void RunOpAdjustKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;
  void AssignStream(NotNull<KernelGraphPtr> kernel_graph) const;
  void BuildKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;
+  void BuildDynamicKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;
  void MemoryAlloc(KernelGraph *kernel_graph) const;
  void RunOpMemoryAlloc(const ValuePtr &pre_output_value, const std::vector<tensor::TensorPtr> &input_tensors,
                        KernelGraph *kernel_graph) const;
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@ -1315,6 +1315,8 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf
                                                                  const std::vector<tensor::TensorPtr> &input_tensors,
                                                                  const std::vector<int> &tensors_mask) {
  auto graph = std::make_shared<KernelGraph>();
+  graph->set_graph_id(run_op_graph_id_);
+  run_op_graph_id_++;
  std::vector<AnfNodePtr> inputs;
  // set input[0]
  PrimitivePtr op_prim = op_run_info.primitive;
@ -1343,9 +1345,12 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf
  MS_EXCEPTION_IF_NULL(cnode);
  // set abstract,which include inferred shapes and types
  cnode->set_abstract(op_run_info.abstract);
+  // get output dynamic shape info
+  AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(op_run_info.is_dynamic_shape), cnode);
  // set execution order
  std::vector<CNodePtr> exe_order = {cnode};
  graph->set_execution_order(exe_order);
+  graph->UpdateGraphDynamicAttr();
  // set output
  CreateOutputNode(cnode, graph);
  graph->SetInputNodes();
--- a/mindspore/ccsrc/backend/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@ -50,12 +50,13 @@ struct OpRunInfo {
  PrimitivePtr primitive;
  AbstractBasePtr abstract;
  ValuePtr value = nullptr;
+  bool is_dynamic_shape = false;
 };
 using OpRunInfoPtr = std::shared_ptr<OpRunInfo>;
 class Executor;
 class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
 public:
-  SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {
+  SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0), run_op_graph_id_(0) {
 #if !defined(_WIN32) && !defined(_WIN64)
    debugger_ = nullptr;
 #endif
@ -182,6 +183,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
  CallBackFunc summary_callback_;
  static GraphId graph_sum_;
  uint32_t device_id_;
+  uint32_t run_op_graph_id_;
  std::shared_ptr<Executor> executor_;
 #if !defined(_WIN32) && !defined(_WIN64)
  std::shared_ptr<Debugger> debugger_;
--- a/mindspore/ccsrc/pipeline/pynative/base.h
+++ b/mindspore/ccsrc/pipeline/pynative/base.h
@ -53,8 +53,9 @@ struct OpExecInfo {
  std::string prim_id;
  PrimitivePyPtr py_primitive;
  AbstractBasePtr abstract;
-  ValuePtr value = nullptr;
+  bool is_dynamic_shape = false;

+  ValuePtr value = nullptr;
  py::list op_inputs;
  py::dict op_attrs;
  std::vector<bool> inputs_mask;
--- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@ -758,6 +758,13 @@ py::tuple PynativeExecutor::RunOpInner(const py::args &args) {
    cnode->set_abstract(op_exec_info->abstract);
  }

+  // get output dynamic shape info
+  MS_EXCEPTION_IF_NULL(op_exec_info->abstract);
+  auto abstract_info = op_exec_info->abstract->ToString();
+  if (abstract_info.find("-1") != string::npos) {
+    op_exec_info->is_dynamic_shape = true;
+  }
+
  op_exec_info->inputs_mask = op_masks;
  MS_EXCEPTION_IF_NULL(op_exec_info);
  if (op_exec_info->abstract != nullptr) {
@ -1301,7 +1308,7 @@ py::object PynativeExecutor::RunOpInMs(const OpExecInfoPtr &op_exec_info, Pynati
  // get graph info for checking it whether existing in the cache
  std::string graph_info = GetSingleOpGraphInfo(op_exec_info, input_tensors);
  session::OpRunInfo op_run_info = {op_exec_info->op_name, op_exec_info->py_primitive, op_exec_info->abstract,
-                                    op_exec_info->value};
+                                    op_exec_info->value, op_exec_info->is_dynamic_shape};
  session->BuildOp(&op_run_info, graph_info, input_tensors, tensors_mask);
  EraseValueNodeTensor(tensors_mask, &input_tensors);
  VectorRef outputs;
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
@ -358,16 +358,13 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
    MS_EXCEPTION_IF_NULL(cnode);
    MS_LOG(INFO) << "Generate node:" << cnode->fullname_with_scope() << " dynamic kernel";
    auto kernel_mod = AnfAlgo::GetKernelMod(cnode);
+    MS_EXCEPTION_IF_NULL(kernel_mod);
    auto dynamic_kernel = kernel_mod->GenDynamicKernel(cnode, stream_);
    MS_EXCEPTION_IF_NULL(dynamic_kernel);
    dynamic_kernel->Initialize();
    dynamic_kernels.emplace_back(dynamic_kernel);
  }
-  auto ret = graph_dynamic_kernel_map_.try_emplace(graph->graph_id(), dynamic_kernels);
-  if (!ret.second) {
-    MS_LOG(ERROR) << "Graph:" << graph->graph_id() << " already generator executor";
-    return false;
-  }
+  graph_dynamic_kernel_map_[graph->graph_id()] = dynamic_kernels;
  MS_LOG(INFO) << "GenDynamicKernel end";
  return true;
 }
--- a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
@ -28,7 +28,7 @@ void DynamicKernel::Initialize() {
  MS_LOG(INFO) << "Init Start";
  is_dynamic_shape_ = AnfAlgo::IsDynamicShape(cnode_ptr_);
  if (!is_dynamic_shape_) {
-    MS_LOG(INFO) << "cnode is not dynamic shape:" << cnode_ptr_->fullname_with_scope();
+    MS_LOG(DEBUG) << "cnode is not dynamic shape:" << cnode_ptr_->fullname_with_scope();
    return;
  }

@ -37,7 +37,7 @@ void DynamicKernel::Initialize() {

  auto have_depends = AnfAlgo::HasNodeAttr(kDynamicShapeDepends, cnode_ptr_);
  if (!have_depends) {
-    MS_LOG(WARNING) << "No dynamic_shape_depends found";
+    MS_LOG(DEBUG) << "No dynamic_shape_depends found";
    return;
  }
  MS_LOG(INFO) << "Have depends";
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@ -799,18 +799,38 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList

 bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
  auto &kernels = graph.execution_order();
-  for (const auto &kernel : kernels) {
-    auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
-    MS_EXCEPTION_IF_NULL(kernel_mod);
-
-    AddressPtrList kernel_inputs;
-    AddressPtrList kernel_workspaces;
-    AddressPtrList kernel_outputs;
-    GenLaunchArgs(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
-    auto ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
-    if (!ret) {
-      MS_LOG(ERROR) << "Launch kernel failed.";
-      return false;
+  std::vector<DynamicKernelPtr> dynamic_kernel_list;
+  auto iter = graph_dynamic_kernel_map_.find(graph.graph_id());
+  if (iter != graph_dynamic_kernel_map_.end()) {
+    dynamic_kernel_list = iter->second;
+  }
+  if (!dynamic_kernel_list.empty() && dynamic_kernel_list.size() != kernels.size()) {
+    MS_LOG(EXCEPTION) << "The size of dynamic kernels " << dynamic_kernel_list.size()
+                      << " should be equal to the size of kernels " << kernels.size();
+  }
+  for (size_t i = 0; i < kernels.size(); ++i) {
+    if (!dynamic_kernel_list.empty() && dynamic_kernel_list[i] != nullptr &&
+        dynamic_kernel_list[i]->is_dynamic_shape()) {
+      dynamic_kernel_list[i]->InferShape();
+      dynamic_kernel_list[i]->UpdateArgs();
+      dynamic_kernel_list[i]->Execute();
+      if (!SyncStream()) {
+        MS_LOG(ERROR) << "SyncStream failed";
+        return false;
+      }
+      dynamic_kernel_list[i]->PostExecute();
+    } else {
+      auto kernel_mod = AnfAlgo::GetKernelMod(kernels[i]);
+      MS_EXCEPTION_IF_NULL(kernel_mod);
+      AddressPtrList kernel_inputs;
+      AddressPtrList kernel_workspaces;
+      AddressPtrList kernel_outputs;
+      GenLaunchArgs(*kernel_mod, kernels[i], &kernel_inputs, &kernel_workspaces, &kernel_outputs);
+      auto ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
+      if (!ret) {
+        MS_LOG(ERROR) << "Launch kernel failed.";
+        return false;
+      }
    }
  }
  return true;
--- a/tests/st/pynative/dynamic_shape/test_pynative_ftrl.py
+++ b/tests/st/pynative/dynamic_shape/test_pynative_ftrl.py
@ -0,0 +1,74 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import pytest
+import mindspore.nn as nn
+from mindspore import Tensor, Parameter, context
+from mindspore.nn import TrainOneStepCell
+from mindspore.nn.optim import FTRL, LazyAdam
+from mindspore.ops import operations as P
+
+context.set_context(enable_sparse=True,
+                    mode=context.PYNATIVE_MODE,
+                    device_target="Ascend")
+
+class NetWithSparseGatherV2(nn.Cell):
+    def __init__(self):
+        super(NetWithSparseGatherV2, self).__init__()
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
+        self.weight2 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight2")
+        self.axis = 1
+        self.gather = P.SparseGatherV2()
+
+    def construct(self, indices, label):
+        return self.gather(self.weight1, indices, self.axis) + self.weight2
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_ftrl_net():
+    indices = Tensor(np.array([0, 0, 1]).astype(np.int32))
+    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
+    net = NetWithSparseGatherV2()
+
+    optimizer = FTRL(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, loss_scale=2.0)
+    optimizer.target = 'Ascend'
+    train_network = TrainOneStepCell(net, optimizer)
+    output = train_network(indices, label)
+    np.allclose(output.asnumpy(), np.array([[[2, 2]], [[2, 2]], [[2, 2]]]))
+    np.allclose(net.weight1.asnumpy(), np.array([[[0.7884067, 0.7884067]],
+                                                 [[0.68213105, 0.68213105]],
+                                                 [[1.0, 1.0]]]))
+    np.allclose(net.weight2.asnumpy(), np.array([[[0.6821311, 0.6821311]],
+                                                 [[0.6821311, 0.6821311]],
+                                                 [[0.6821311, 0.6821311]]]))
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_lazy_adam_net():
+    indices = Tensor(np.array([0, 0, 1]).astype(np.int32))
+    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
+    net = NetWithSparseGatherV2()
+
+    optimizer = LazyAdam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, loss_scale=2.0)
+    optimizer.target = 'Ascend'
+    train_network = TrainOneStepCell(net, optimizer)
+    output = train_network(indices, label)
+    np.allclose(output.asnumpy(), np.array([[[2, 2]], [[2, 2]], [[2, 2]]]))
+    np.allclose(net.weight1.asnumpy(), np.array([[[0.9, 0.9]], [[0.9, 0.9]], [[1.0, 1.0]]]))
+    np.allclose(net.weight2.asnumpy(), np.array([[[0.9, 0.9]], [[0.9, 0.9]], [[0.9, 0.9]]]))
--- a/tests/st/pynative/dynamic_shape/test_pynative_unique.py
+++ b/tests/st/pynative/dynamic_shape/test_pynative_unique.py
@ -0,0 +1,44 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import pytest
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+import mindspore.common.dtype as mstype
+from mindspore.ops import operations as P
+
+context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.unique = P.Unique()
+
+    def construct(self, x):
+        return self.unique(x)
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_pynative_unqiue():
+    x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
+    unique = Net()
+    output = unique(x)
+    expect1 = np.array([1, 2, 3])
+    expect2 = np.array([0, 0, 1, 1, 2, 2])
+    assert (output[0].asnumpy() == expect1).all()
+    assert (output[1].asnumpy() == expect2).all()