!1225 gpu NoOp optimizer

Merge pull request !1225 from chenweifeng/NoOp
5 years ago · a915cc3bd9
parent 47275427da 23b4b4d106
commit a915cc3bd9
2 changed files with 6 additions and 1 deletions
--- a/mindspore/ccsrc/pre_activate/common/helper.cc
+++ b/mindspore/ccsrc/pre_activate/common/helper.cc
@ -381,7 +381,7 @@ tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) {
 bool IsNopNode(const AnfNodePtr &node) {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (context_ptr->device_target() != kAscendDevice) {
+  if (context_ptr->device_target() != kAscendDevice && context_ptr->device_target() != kGPUDevice) {
    return false;
  }
  static std::unordered_set<std::string> nop_nodes = {prim::kPrimReshape->name(), kExpandDimsOpName,
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@ -20,6 +20,7 @@
 #include "device/gpu/gpu_stream_assign.h"
 #include "pre_activate/common/optimizer.h"
 #include "pre_activate/common/pass_manager.h"
+#include "pre_activate/common/helper.h"
 #include "pre_activate/pass/communication_op_fusion.h"
 #include "device/kernel_runtime_manager.h"
 #include "predict/predict.h"
@ -69,6 +70,7 @@ void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const {
  MS_EXCEPTION_IF_NULL(kernel_graph);
  auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
  MS_EXCEPTION_IF_NULL(runtime_instance);
+  opt::RemoveNopNode(kernel_graph);
  runtime_instance->AssignMemory(kernel_graph);
 }

@ -77,6 +79,7 @@ void GPUSession::RunOpAllocateMemory(const std::vector<tensor::TensorPtr> &input
  MS_EXCEPTION_IF_NULL(kernel_graph);
  auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
  MS_EXCEPTION_IF_NULL(runtime_instance);
+  opt::RemoveNopNode(kernel_graph);
  runtime_instance->RunOpAssignMemory(input_tensors, kernel_graph);
 }

@ -102,6 +105,8 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
  Optimize(graph);
  // Assign CUDA streams
  AssignStream(graph);
+  // Remove NoOp from execution graph
+  opt::HideNopNode(graph.get());
  // Build kernel if node is cnode
  BuildKernel(graph);
  // Set graph execution order before memory alloc, ensure that memory alloc is according to the reorder graph