add mem manager

5 years ago · fb343bd607
parent dd9a5a385a
commit fb343bd607
18 changed files with 562 additions and 378 deletions
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -132,6 +132,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "kernel/kash/*.cc"
        "device/kernel_info.cc"
        "device/kernel_runtime.cc"
        "device/memory_manager.cc"
        "device/kernel_runtime_manager.cc"
        "device/convert_tensor_utils.cc"
        "pre_activate/common/*.cc"
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@ -37,6 +37,7 @@
 #include "kernel/tbe/tbe_utils.h"
 #include "kernel/tbe/tbe_python_funcs.h"
 #include "pre_activate/mem_reuse/mem_reuse_checker.h"
 #include "device/ascend/ascend_memory_manager.h"
 using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
@ -47,8 +48,6 @@ using std::vector;
 namespace mindspore {
 namespace device {
 namespace ascend {
 static const uint64_t ASCEND_MEM_SIZE = 20;
 static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
 static const size_t PRAMATER_OUTPUT_INDEX = 0;
 AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
@ -86,7 +85,8 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
    MS_EXCEPTION(DeviceProcessError) << "rtSetDevice, ret[" << static_cast<int>(ret) << "]";
  }
-  FreeDeviceMemory();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
  mem_manager_->FreeDeviceMemory();
  (void)DestroyHccl();
  (void)ResetDevice();
  (void)ProfilingManager::GetInstance().StopProfiling();
@ -109,11 +109,9 @@ bool AscendKernelRuntime::Init() {
  if (!ret) {
    return ret;
  }
-
+  mem_manager_ = std::make_shared<AscendMemoryManager>();
-  ret = MallocDeviceMemory();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
-  if (!ret) {
+  mem_manager_->MallocDeviceMemory();
    return ret;
  }
  ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
  if (!ret) {
@ -239,13 +237,6 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
  return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id);
 }
 void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
  auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
  MS_EXCEPTION_IF_NULL(device_ptr);
  address->ptr_ = device_ptr;
  address->mem_dynamic_alloc_ = true;
 }
 bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
@ -474,42 +465,6 @@ bool AscendKernelRuntime::DestroyHccl() {
  context_ptr->set_enable_hccl(false);
  return true;
 }
 bool AscendKernelRuntime::MallocDeviceMemory() {
  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
  if (ret != RT_ERROR_NONE) {
    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
  }
  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
  ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
  if (ret != RT_ERROR_NONE) {
    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
  }
  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
  return true;
 }
 void AscendKernelRuntime::FreeDeviceMemory() {
  if (device_mem_base_ != nullptr) {
    auto ret = rtFree(device_mem_base_);
    if (ret != RT_ERROR_NONE) {
      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
    }
    device_mem_base_ = nullptr;
  }
  if (device_mem_pool_base_ != nullptr) {
    auto ret = rtFree(device_mem_pool_base_);
    if (ret != RT_ERROR_NONE) {
      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
    }
    device_mem_pool_base_ = nullptr;
  }
 }
 void AscendKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@ -39,13 +39,11 @@ class AscendKernelRuntime : public KernelRuntime {
  bool GenTask(const session::KernelGraph *graph) override;
  bool RunTask(const session::KernelGraph *graph) override;
  bool LoadTask(const session::KernelGraph *graph) override;
  void FreeHostMemory() override;
 protected:
  DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
                                       TypeId type_id) override;
  bool SyncStream() override;
  void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override;
 private:
  bool InitDevice();
@ -53,8 +51,7 @@ class AscendKernelRuntime : public KernelRuntime {
  bool HcclInit();
  bool NeedDestroyHccl();
  bool DestroyHccl();
-  bool MallocDeviceMemory();
+
  void FreeDeviceMemory();
  void ClearGraphModelMap();
  void ReleaseDeviceRes() override;
  uint32_t GetGraphModelId(const session::KernelGraph *kernel_graph);
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
@ -0,0 +1,65 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "device/ascend/ascend_memory_manager.h"
 #include "device/ascend/ascend_memory_allocator.h"
 #include "utils/context/ms_context.h"
 #include "runtime/mem.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
 static const uint64_t ASCEND_MEM_SIZE = 20;
 static const uint64_t ASCEND_MEM_SIZE_BYTE = (ASCEND_MEM_SIZE << 30);
 void AscendMemoryManager::MallocDeviceMemory() {
  device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
  static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
  auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
  if (ret != RT_ERROR_NONE) {
    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
  }
  device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
  ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
  if (ret != RT_ERROR_NONE) {
    MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
  }
  AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
  AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
 }
 void AscendMemoryManager::FreeDeviceMemory() {
  if (device_mem_base_ != nullptr) {
    auto ret = rtFree(device_mem_base_);
    if (ret != RT_ERROR_NONE) {
      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
    }
    device_mem_base_ = nullptr;
  }
  if (device_mem_pool_base_ != nullptr) {
    auto ret = rtFree(device_mem_pool_base_);
    if (ret != RT_ERROR_NONE) {
      MS_LOG(ERROR) << "rtFree mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
    }
    device_mem_pool_base_ = nullptr;
  }
 }
 void *AscendMemoryManager::AllocTensorMemDynamic(size_t size) {
  return AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
 }
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.h
@ -0,0 +1,35 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
 #include "device/memory_manager.h"
 namespace mindspore {
 namespace device {
 namespace ascend {
 class AscendMemoryManager : public MemoryManager {
 public:
  AscendMemoryManager() = default;
  virtual ~AscendMemoryManager() = default;
  void MallocDeviceMemory() override;
  void FreeDeviceMemory() override;
  void *AllocTensorMemDynamic(size_t size) override;
 };
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_ASCEND_MEMORY_MANAGER_H_
--- a/mindspore/ccsrc/device/device_address.h
+++ b/mindspore/ccsrc/device/device_address.h
@ -33,12 +33,14 @@ class CPUKernelRuntime;
 }  // namespace cpu
 namespace ascend {
 class AscendKernelRuntime;
 class AscendMemoryManager;
 namespace tasksink {
 class TaskGenerator;
 }  // namespace tasksink
 }  // namespace ascend
 namespace gpu {
 class GPUKernelRuntime;
 class GPUMemoryManager;
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
@ -70,12 +72,15 @@ class DeviceAddress {
  TypeId type_id_{kNumberTypeFloat16};
  bool mem_dynamic_alloc_{false};
  friend class KernelRuntime;
  friend class MemoryManager;
  friend class mindspore::device::ascend::tasksink::TaskGenerator;
  friend class mindspore::device::cpu::CPUSimpleMemPlan;
  friend class mindspore::device::cpu::CPUResourceManager;
  friend class mindspore::device::cpu::CPUKernelRuntime;
  friend class mindspore::device::gpu::GPUKernelRuntime;
  friend class mindspore::device::gpu::GPUMemoryManager;
  friend class mindspore::device::ascend::AscendKernelRuntime;
  friend class mindspore::device::ascend::AscendMemoryManager;
 };
 using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@ -26,6 +26,7 @@
 #include "device/kernel_runtime_manager.h"
 #include "device/gpu/gpu_common.h"
 #include "common/utils.h"
 #include "device/gpu/gpu_memory_manager.h"
 namespace mindspore {
 namespace device {
@ -36,26 +37,14 @@ bool GPUKernelRuntime::Init() {
  if (device_init_ == true) {
    return true;
  }
  auto ret = InitDevice();
  if (!ret) {
    MS_LOG(ERROR) << "InitDevice error.";
    return ret;
  }
-
+  mem_manager_ = std::make_shared<GPUMemoryManager>();
-  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(mem_manager_);
-  MS_EXCEPTION_IF_NULL(context_ptr);
+  mem_manager_->MallocDeviceMemory();
  // If use the dynamic memory pool, then alloc the first memory block to init.
  if (context_ptr->enable_dynamic_mem_pool()) {
    auto device_addr = AllocTensorMemDynamic(1);
    if (!device_addr) {
      MS_LOG(ERROR) << "Dynamic memory pool init error.";
      return false;
    }
  } else {
    MallocDeviceMemory();
  }
  const void *collective_handle_ = CollectiveInitializer::instance().collective_handle();
  bool collective_inited = CollectiveInitializer::instance().collective_inited();
  if (collective_inited && collective_handle_ != nullptr) {
@ -101,16 +90,6 @@ bool GPUKernelRuntime::InitDevice() {
  return true;
 }
 void GPUKernelRuntime::MallocDeviceMemory() {
  // Need to reserve 20% space for dynamic memory
  const float init_gpu_mem_ratio = 0.8;
  size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
  auto alloc_size =
    GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
  device_mem_size_ = alloc_size;
  static_mem_offset_ = device_mem_size_;
 }
 void GPUKernelRuntime::ReleaseDeviceRes() {
  // For dataset mode.
  if (GpuBufferMgr::GetInstance().IsInit()) {
@ -122,39 +101,22 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
    CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue.");
  }
  GPUDeviceManager::GetInstance().ReleaseDevice();
-  if (device_mem_base_ != nullptr) {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
-    if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
+  mem_manager_->FreeDeviceMemory();
      MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
    }
  }
  GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
 }
 void GPUKernelRuntime::FreeHostMemory() { dynamic_mem_offset_ = 0; }
 void *GPUKernelRuntime::AllocTensorMemDynamic(size_t size) {
  return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
 }
 void GPUKernelRuntime::FreeTensorMemDynamic(void *device_ptr) {
  GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
 }
 void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  MS_EXCEPTION_IF_NULL(mem_manager_);
  mem_manager_->ResetDynamicMemory();
  AssignStaticMemory(graph);
  bool is_enable_mem_reuse = context_ptr->enable_mem_reuse();
  bool is_enable_dynamic_mem = context_ptr->enable_dynamic_mem_pool();
  if (is_enable_dynamic_mem) {
    // Use the dynamic memory pool.
    InitKernelRefCount(graph);
    InitKernelOutputAddress(graph);
  } else if (is_enable_mem_reuse) {
    // Use the memory reuse.
    ReuseAssignDynamicMemory(graph);
  } else {
    // Normal way.
    AssignDynamicMemory(graph);
  }
 }
@ -179,32 +141,6 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph) {
  return ret;
 }
 uint8_t *GPUKernelRuntime::MallocStaticMem(size_t size, bool) {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  if (context_ptr->enable_dynamic_mem_pool()) {
    auto device_ptr = AllocTensorMemDynamic(size);
    MS_EXCEPTION_IF_NULL(device_ptr);
    return AddressOffset(device_ptr, 0);
  }
  auto align_size = GetCommonAlignSize(size);
  if (static_mem_offset_ < align_size) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  auto offset = static_mem_offset_ - align_size;
  if (dynamic_mem_offset_ > offset) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  total_static_size_ += align_size;
  static_mem_offset_ = offset;
  return device_mem_base_ + offset;
 }
 void GPUKernelRuntime::InitKernelRefCount(const session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
@ -273,6 +209,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
  MS_EXCEPTION_IF_NULL(kernel_inputs);
  MS_EXCEPTION_IF_NULL(kernel_workspaces);
  MS_EXCEPTION_IF_NULL(kernel_outputs);
  MS_EXCEPTION_IF_NULL(mem_manager_);
  for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
    auto device_address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i);
    MS_EXCEPTION_IF_NULL(device_address);
@ -290,7 +227,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
    MS_EXCEPTION_IF_NULL(device_address);
    auto device_ptr = device_address->ptr_;
    if (device_ptr == nullptr) {
-      device_ptr = AllocTensorMemDynamic(output_sizes[i]);
+      device_ptr = mem_manager_->AllocTensorMemDynamic(output_sizes[i]);
      MS_EXCEPTION_IF_NULL(device_ptr);
      device_address->ptr_ = device_ptr;
    }
@ -307,7 +244,7 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
      kernel_workspaces->emplace_back(nullptr);
      continue;
    }
-    auto device_ptr = AllocTensorMemDynamic(workspace_sizes[i]);
+    auto device_ptr = mem_manager_->AllocTensorMemDynamic(workspace_sizes[i]);
    MS_EXCEPTION_IF_NULL(device_ptr);
    kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
    MS_EXCEPTION_IF_NULL(workspace);
@ -333,6 +270,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph
 void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) {
  MS_EXCEPTION_IF_NULL(kernel);
  MS_EXCEPTION_IF_NULL(mem_manager_);
  // The reference count of communication kernel input is not 0.
  if (communication_op_input_ref_count_ != 0) {
    MS_LOG(ERROR) << "The reference count of communication kernel input is not 0.";
@ -354,7 +292,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
    addr_size.emplace_back(device_address.get(), output_size);
  }
-  auto device_mem_ptr = AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
  MS_EXCEPTION_IF_NULL(device_mem_ptr);
  for (const auto &iter : addr_size) {
    MS_EXCEPTION_IF_NULL(iter.first);
@ -366,6 +304,7 @@ void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfN
 void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) {
  MS_EXCEPTION_IF_NULL(kernel);
  MS_EXCEPTION_IF_NULL(mem_manager_);
  // The reference count of communication kernel output is not 0.
  if (communication_op_output_ref_count_ != 0) {
    MS_LOG(ERROR) << "The reference count of communication kernel output is not 0.";
@ -389,7 +328,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
    addr_size.emplace_back(device_address.get(), output_sizes[i]);
  }
-  auto device_mem_ptr = AllocTensorMemDynamic(total);
+  auto device_mem_ptr = mem_manager_->AllocTensorMemDynamic(total);
  MS_EXCEPTION_IF_NULL(device_mem_ptr);
  for (const auto &iter : addr_size) {
    MS_EXCEPTION_IF_NULL(iter.first);
@ -402,6 +341,7 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
 void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
                                            const AddressPtrList &kernel_workspaces) {
  MS_EXCEPTION_IF_NULL(kernel);
  MS_EXCEPTION_IF_NULL(mem_manager_);
  auto cnode = kernel->cast<CNodePtr>();
  MS_EXCEPTION_IF_NULL(cnode);
  // Free the input of kernel by reference count.
@ -421,7 +361,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
        auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
        MS_EXCEPTION_IF_NULL(device_address);
        MS_EXCEPTION_IF_NULL(device_address->ptr_);
-        FreeTensorMemDynamic(device_address->ptr_);
+        mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
        device_address->ptr_ = nullptr;
      }
    }
@ -432,7 +372,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
    auto workspace = kernel_workspaces[i];
    if (workspace != nullptr) {
      MS_EXCEPTION_IF_NULL(workspace->addr);
-      FreeTensorMemDynamic(workspace->addr);
+      mem_manager_->FreeTensorMemDynamic(workspace->addr);
      workspace->addr = nullptr;
    }
  }
@ -441,6 +381,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
 void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx,
                                                     bool *is_communication_op) {
  MS_EXCEPTION_IF_NULL(kernel);
  MS_EXCEPTION_IF_NULL(mem_manager_);
  // The inputs memory of communication kernel is one piece memory, need release together.
  if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) {
    communication_op_input_ref_count_--;
@ -448,7 +389,7 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
      auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
      MS_EXCEPTION_IF_NULL(device_address);
      MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
      device_address->ptr_ = nullptr;
    }
    *is_communication_op = true;
@ -470,19 +411,12 @@ void GPUKernelRuntime::FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr
      auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_input.first, 0);
      MS_EXCEPTION_IF_NULL(device_address);
      MS_EXCEPTION_IF_NULL(device_address->ptr_);
-      FreeTensorMemDynamic(device_address->ptr_);
+      mem_manager_->FreeTensorMemDynamic(device_address->ptr_);
      device_address->ptr_ = nullptr;
    }
    *is_communication_op = true;
  }
 }
 void GPUKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
  auto device_ptr = AllocTensorMemDynamic(size);
  MS_EXCEPTION_IF_NULL(device_ptr);
  address->ptr_ = device_ptr;
  address->mem_dynamic_alloc_ = true;
 }
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
@ -33,7 +33,6 @@ class GPUKernelRuntime : public KernelRuntime {
  ~GPUKernelRuntime() override = default;
  bool Init() override;
  void ReleaseDeviceRes() override;
  void FreeHostMemory() override;
  void AssignMemory(session::KernelGraph *graph) override;
  bool Run(session::KernelGraph *graph) override;
@ -41,18 +40,11 @@ class GPUKernelRuntime : public KernelRuntime {
  DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
                                       TypeId type_id) override;
  bool SyncStream() override;
  // Alloc memory use the dynamic memory pool.
  void *AllocTensorMemDynamic(size_t size) override;
  // Free memory use the dynamic memory pool.
  void FreeTensorMemDynamic(void *device_ptr) override;
  void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) override;
  uint8_t *MallocStaticMem(size_t size, bool communication_mem) override;
 private:
  GPUKernelRuntime(const GPUKernelRuntime &);
  GPUKernelRuntime &operator=(const GPUKernelRuntime &);
  bool InitDevice();
  void MallocDeviceMemory();
  bool device_init_{false};
  // The related functions and members for using dynamic memory pool.
@ -69,6 +61,7 @@ class GPUKernelRuntime : public KernelRuntime {
  void FreeCommunicationOpDynamicRes(const mindspore::AnfNodePtr &kernel, size_t input_idx, bool *is_communication_op);
  size_t communication_op_input_ref_count_{0};
  size_t communication_op_output_ref_count_{0};
  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
 };
 MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime);
 }  // namespace gpu
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
@ -0,0 +1,88 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "device/gpu/gpu_memory_manager.h"
 #include "device/gpu/gpu_memory_allocator.h"
 #include "utils/context/ms_context.h"
 #include "utils/convert_utils.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
 void *GPUMemoryManager::AllocTensorMemDynamic(size_t size) {
  return GPUMemoryAllocator::GetInstance().AllocTensorMem(size);
 }
 void GPUMemoryManager::FreeTensorMemDynamic(void *device_ptr) {
  GPUMemoryAllocator::GetInstance().FreeTensorMem(device_ptr);
 }
 void GPUMemoryManager::MallocDeviceMemory() {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  // If use the dynamic memory pool, then alloc the first memory block to init.
  if (context_ptr->enable_dynamic_mem_pool()) {
    auto device_addr = AllocTensorMemDynamic(1);
    if (!device_addr) {
      MS_LOG(ERROR) << "Dynamic memory pool init error.";
    }
  } else {
    // Need to reserve 20% space for dynamic memory
    const float init_gpu_mem_ratio = 0.8;
    size_t mem_size = FloatToSize(GPUMemoryAllocator::GetInstance().free_mem_size() * init_gpu_mem_ratio);
    auto alloc_size =
      GPUMemoryAllocator::GetInstance().AllocDeviceMem(mem_size, reinterpret_cast<void **>(&device_mem_base_));
    device_mem_size_ = alloc_size;
    static_mem_offset_ = device_mem_size_;
  }
 }
 void GPUMemoryManager::FreeDeviceMemory() {
  if (device_mem_base_ != nullptr) {
    if (!GPUMemoryAllocator::GetInstance().FreeDeviceMem(device_mem_base_)) {
      MS_LOG(EXCEPTION) << "Could not free gpu device memory.";
    }
  }
  GPUMemoryAllocator::GetInstance().ReleaseDeviceRes();
 }
 uint8_t *GPUMemoryManager::MallocStaticMem(size_t size, bool) {
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  if (context_ptr->enable_dynamic_mem_pool()) {
    auto device_ptr = AllocTensorMemDynamic(size);
    MS_EXCEPTION_IF_NULL(device_ptr);
    return AddressOffset(device_ptr, 0);
  }
  auto align_size = GetCommonAlignSize(size);
  if (static_mem_offset_ < align_size) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  auto offset = static_mem_offset_ - align_size;
  if (dynamic_mem_offset_ > offset) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  total_static_size_ += align_size;
  static_mem_offset_ = offset;
  return device_mem_base_ + offset;
 }
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.h
@ -0,0 +1,40 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
 #include "device/memory_manager.h"
 namespace mindspore {
 namespace device {
 namespace gpu {
 class GPUMemoryManager : public MemoryManager {
 public:
  GPUMemoryManager() = default;
  virtual ~GPUMemoryManager() = default;
  void MallocDeviceMemory() override;
  void FreeDeviceMemory() override;
  void *AllocTensorMemDynamic(size_t size) override;
  void FreeTensorMemDynamic(void *device_ptr) override;
 protected:
  uint8_t *MallocStaticMem(size_t size, bool communication_mem);
 };
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_GPU_GPU_MEMORY_MANAGER_H_
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/device/kernel_runtime.cc
--- a/mindspore/ccsrc/device/kernel_runtime.h
+++ b/mindspore/ccsrc/device/kernel_runtime.h
@ -20,8 +20,7 @@
 #include <memory>
 #include <string>
 #include <map>
-#include "pre_activate/mem_reuse/mem_reuse.h"
+
 #include "pre_activate/mem_reuse/mem_reuse_allocator.h"
 #include "device/device_address.h"
 #include "ir/meta_tensor.h"
 #include "predict/generator/utils/ir_model_util.h"
@ -32,21 +31,16 @@
 #include "session/anf_runtime_algorithm.h"
 #include "kernel/kernel.h"
 #include "utils/context/ms_context.h"
 #include "device/memory_manager.h"
 // using mindspore::session::KernelGraph;
 using mindspore::tensor::Tensor;
 using TensorPtr = std::shared_ptr<Tensor>;
 using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
 using mindspore::kernel::AddressPtr;
 using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
 namespace mindspore {
 namespace device {
 const int kStaticMem = 0;
 const int kDynamicMem = 1;
 const int kReuseDynamicMem = 2;
 const int kGetAllOuts = -1;
 class KernelRuntime {
 public:
  KernelRuntime() = default;
@ -65,7 +59,6 @@ class KernelRuntime {
  DumpConfPtr GetDumpConf();
 #endif
  virtual bool LoadTask(const session::KernelGraph *graph);
  virtual void FreeHostMemory();
  // for GPU and D to impl
  virtual void ReleaseDeviceRes() {}
  void set_device_id(uint32_t device_id) { device_id_ = device_id; }
@ -75,29 +68,17 @@ class KernelRuntime {
                                               TypeId type_id) = 0;
  virtual bool SyncStream() = 0;
  void AssignStaticMemory(session::KernelGraph *graph);
-  void AssignDynamicMemory(const session::KernelGraph *graph);
+  void AssignDynamicMemory(session::KernelGraph *graph);
  void ReuseAssignDynamicMemory(session::KernelGraph *graph);
  void AssignNodeOutputMem(int flag, const AnfNodePtr &node, int index);
-  void AssignWorkSpaceMem(const AnfNodePtr &node);
+  void AssignWorkSpaceMem(int flag, const AnfNodePtr &node);
  void AssignReuseWorkSpaceMem(const AnfNodePtr &node);
  void AssignCommunicationNodeOutputMem(int flag, const AnfNodePtr &node);
  void UpdateRefNodeOutputMem(const session::KernelGraph *graph);
  void UpdateCommunicationOpInputMem(const AnfNodePtr &node);
  bool IsCommunicationOp(const AnfNodePtr &node);
  size_t GetCommonAlignSize(size_t input_size) const;
  size_t GetCommunicationAlignSize(size_t input_size) const;
  uint8_t *CalDeviceMem(const AnfNodePtr &node, size_t size, int flag, size_t index);
  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
  uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
 #ifdef ENABLE_DUMP_E2E
  bool SetDumpConf();
 #endif
  // Alloc memory use the dynamic memory pool.
  virtual void *AllocTensorMemDynamic(size_t size);
  // Free memory use the dynamic memory pool.
  virtual void FreeTensorMemDynamic(void *device_ptr);
  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag);
 private:
  void AssignStaticMemoryOutput(const session::KernelGraph *graph);
@ -114,20 +95,11 @@ class KernelRuntime {
 protected:
  uint32_t device_id_{0};
  uint8_t *device_mem_base_{nullptr};
  uint8_t *device_mem_pool_base_{nullptr};
  uint64_t device_mem_size_{0};
  uint64_t device_mem_pool_size_{0};
  uint64_t dynamic_mem_offset_{0};
  uint64_t static_mem_offset_{0};
  const uint64_t mem_align_size_ = 512;
 #ifdef ENABLE_DUMP_E2E
  DumpConfPtr dump_conf_ptr_;
 #endif
  void *stream_ = nullptr;
-  size_t total_static_size_ = 0;
+  std::shared_ptr<MemoryManager> mem_manager_{nullptr};
  size_t total_dynamic_size_ = 0;
  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
 };
 using KernelRuntimePtr = std::shared_ptr<KernelRuntime>;
 }  // namespace device
--- a/mindspore/ccsrc/device/memory_manager.cc
+++ b/mindspore/ccsrc/device/memory_manager.cc
@ -0,0 +1,170 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "device/memory_manager.h"
 #include "session/anf_runtime_algorithm.h"
 #include "utils/context/ms_context.h"
 using mindspore::memreuse::BestFitMemReuse;
 using mindspore::memreuse::MemReuseUtilPtr;
 namespace mindspore {
 namespace device {
 MemoryManager::~MemoryManager() {
  device_mem_base_ = nullptr;
  device_mem_pool_base_ = nullptr;
  mem_reuse_util_ptr_ = nullptr;
 }
 size_t MemoryManager::GetCommonAlignSize(size_t input_size) const {
  return (input_size + kMemAlignSize + 31) / kMemAlignSize * kMemAlignSize;
 }
 size_t MemoryManager::GetCommunicationAlignSize(size_t input_size) const {
  return (input_size + kMemAlignSize - 1) / kMemAlignSize * kMemAlignSize + 2 * kMemAlignSize;
 }
 void MemoryManager::InitReuseDynamicMemory(session::KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  MemReuseUtilPtr mem_reuse_util_ptr = std::make_shared<memreuse::MemReuseUtil>();
  MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
  // set all infos
  mem_reuse_util_ptr->SetAllInfo(graph);
  auto bestfit_mem_reuse = std::make_shared<BestFitMemReuse>();
  MS_EXCEPTION_IF_NULL(bestfit_mem_reuse);
  bestfit_mem_reuse->Reuse(mem_reuse_util_ptr.get());
  size_t total_allocated_size = bestfit_mem_reuse->GetAllocatedSize();
  MS_LOG(INFO) << "TotalReuseDynamicSize [" << total_allocated_size << "]";
  mem_reuse_util_ptr_ = mem_reuse_util_ptr;
  auto base_ptr = MallocDynamicMem(total_allocated_size, false);
  mem_reuse_util_ptr_->set_mem_base(base_ptr);
 }
 uint8_t *MemoryManager::MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size) {
  MS_EXCEPTION_IF_NULL(node);
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  uint8_t *ptr = nullptr;
  if (AnfAlgo::IsCommunicationOp(node)) {
    bool communication_mem = false;
    if (context_ptr->enable_hccl()) {
      communication_mem = true;
    }
    if (flag == kStaticMem) {
      ptr = MallocStaticMem(size, communication_mem);
    } else {
      ptr = MallocDynamicMem(size, communication_mem);
    }
    return ptr;
  }
  if (flag == kStaticMem) {
    ptr = MallocStaticMem(size, false);
  } else if (flag == kDynamicMem) {
    ptr = MallocDynamicMem(size, false);
  } else if (flag == kReuseDynamicMem) {
    ptr = mem_reuse_util_ptr_->GetNodeOutputPtr(node, index);
  }
  return ptr;
 }
 uint8_t *MemoryManager::MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size) {
  if (flag == kReuseDynamicMem) {
    return mem_reuse_util_ptr_->GetNodeWorkSpacePtr(node, index);
  }
  return MallocDynamicMem(size, false);
 }
 uint8_t *MemoryManager::MallocMem(int flag, size_t size) {
  uint8_t *ptr = nullptr;
  if (flag == kStaticMem) {
    ptr = MallocStaticMem(size, false);
  } else if (flag == kDynamicMem) {
    ptr = MallocDynamicMem(size, false);
  }
  return ptr;
 }
 uint8_t *MemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
  size_t align_size = 0;
  if (communication_mem) {
    align_size = GetCommunicationAlignSize(size);
  } else {
    align_size = GetCommonAlignSize(size);
  }
  if (static_mem_offset_ < align_size) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  total_static_size_ += align_size;
  auto offset = static_mem_offset_ - align_size;
  if (dynamic_mem_offset_ > offset) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  static_mem_offset_ = offset;
  if (communication_mem) {
    return device_mem_base_ + offset + kMemAlignSize;
  } else {
    return device_mem_base_ + offset;
  }
 }
 uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
  size_t align_size = 0;
  if (communication_mem) {
    align_size = GetCommunicationAlignSize(size);
  } else {
    align_size = GetCommonAlignSize(size);
  }
  uint64_t offset = dynamic_mem_offset_;
  auto new_offset = dynamic_mem_offset_ + align_size;
  if (new_offset > static_mem_offset_) {
    MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "](dynamic[" << total_dynamic_size_
                      << "] static[" << total_static_size_ << "])"
                      << " malloc [" << align_size << "] failed!";
  }
  total_dynamic_size_ += align_size;
  dynamic_mem_offset_ = new_offset;
  if (communication_mem) {
    return device_mem_base_ + offset + kMemAlignSize;
  } else {
    return device_mem_base_ + offset;
  }
 }
 void MemoryManager::MallocOpMemory(const DeviceAddressPtr address, size_t size) {
  auto device_ptr = AllocTensorMemDynamic(size);
  MS_EXCEPTION_IF_NULL(device_ptr);
  address->ptr_ = device_ptr;
  address->mem_dynamic_alloc_ = true;
 }
 void *MemoryManager::AllocTensorMemDynamic(size_t size) {
  if (size == 0) {
    MS_LOG(ERROR) << "AllocTensorMemDynamic size is 0.";
  }
  return nullptr;
 }
 void MemoryManager::FreeTensorMemDynamic(void *device_ptr) {
  if (device_ptr == nullptr) {
    MS_LOG(ERROR) << "FreeTensorMemDynamic device_ptr is null.";
  }
 }
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/device/memory_manager.h
+++ b/mindspore/ccsrc/device/memory_manager.h
@ -0,0 +1,71 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
 #define MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
 #include <memory>
 #include "pre_activate/mem_reuse/mem_reuse.h"
 #include "pre_activate/mem_reuse/mem_reuse_allocator.h"
 namespace mindspore {
 namespace device {
 const int kStaticMem = 0;
 const int kDynamicMem = 1;
 const int kReuseDynamicMem = 2;
 const int kGetAllOuts = -1;
 const uint64_t kMemAlignSize = 512;
 using MemReuseUtilPtr = mindspore::memreuse::MemReuseUtilPtr;
 class MemoryManager {
 public:
  MemoryManager() = default;
  virtual ~MemoryManager();
  virtual void MallocDeviceMemory() = 0;
  virtual void FreeDeviceMemory() = 0;
  void ResetDynamicMemory() {
    total_dynamic_size_ = 0;
    dynamic_mem_offset_ = 0;
  }
  void InitReuseDynamicMemory(session::KernelGraph *graph);
  uint8_t *MallocOutputMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
  uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
  virtual uint8_t *MallocMem(int flag, size_t size);
  // Alloc memory use the dynamic memory pool.
  virtual void *AllocTensorMemDynamic(size_t size);
  // Free memory use the dynamic memory pool.
  virtual void FreeTensorMemDynamic(void *device_ptr);
  virtual void MallocOpMemory(const DeviceAddressPtr address, size_t size);
  size_t GetCommonAlignSize(size_t input_size) const;
  size_t GetCommunicationAlignSize(size_t input_size) const;
 protected:
  virtual uint8_t *MallocStaticMem(size_t size, bool communication_mem);
  virtual uint8_t *MallocDynamicMem(size_t size, bool communication_mem);
  uint8_t *device_mem_base_{nullptr};
  uint8_t *device_mem_pool_base_{nullptr};
  uint64_t device_mem_size_{0};
  uint64_t device_mem_pool_size_{0};
  uint64_t dynamic_mem_offset_{0};
  uint64_t static_mem_offset_{0};
  size_t total_static_size_ = 0;
  size_t total_dynamic_size_ = 0;
  MemReuseUtilPtr mem_reuse_util_ptr_{nullptr};
 };
 }  // namespace device
 }  // namespace mindspore
 #endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_MEMORY_MANAGER_H_
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@ -857,5 +857,15 @@ void AnfRuntimeAlgorithm::SetNodeInput(const CNodePtr &node, const AnfNodePtr &i
  MS_EXCEPTION_IF_NULL(input_node);
  node->set_input(index + 1, input_node);
 }
 bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) {
  MS_EXCEPTION_IF_NULL(node);
  auto kernel_name = AnfAlgo::GetCNodeName(node);
  auto kernel_type = AnfAlgo::GetKernelType(node);
  if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) {
    return true;
  }
  return false;
 }
 }  // namespace session
 }  // namespace mindspore
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h
@ -166,6 +166,7 @@ class AnfRuntimeAlgorithm {
  static bool IsFeatureMapInput(const AnfNodePtr &node, size_t input_index);
  // get real input index for some tbe ops which input order is different between me and tbe impl
  static size_t GetRealInputIndex(const AnfNodePtr &anf_node, const size_t cur_index);
  static bool IsCommunicationOp(const AnfNodePtr &node);
 };
 }  // namespace session
 using AnfAlgo = session::AnfRuntimeAlgorithm;
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@ -102,10 +102,6 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
  graph->set_execution_order(execution_order);
  // Alloc memory, including static memory and dynamic memory
  AllocateMemory(graph.get());
  // Reset memory resource
  auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
  MS_EXCEPTION_IF_NULL(runtime_instance);
  runtime_instance->FreeHostMemory();
  return graph_id;
 }
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@ -85,6 +85,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "../../../mindspore/ccsrc/kernel/oplib/*.cc"
        "../../../mindspore/ccsrc/kernel/tbe/*.cc"
        "../../../mindspore/ccsrc/device/kernel_runtime.cc"
        "../../../mindspore/ccsrc/device/memory_manager.cc"
        "../../../mindspore/ccsrc/device/kernel_runtime_manager.cc"
        "../../../mindspore/ccsrc/device/kernel_info.cc"
        "../../../mindspore/ccsrc/device/ascend/profiling/*.cc"
@ -92,6 +93,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "../../../mindspore/ccsrc/device/convert_tensor_utils.cc"
        "../../../mindspore/ccsrc/device/ascend/kernel_build_ascend.cc"
        "../../../mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc"
        "../../../mindspore/ccsrc/device/ascend/ascend_memory_manager.cc"
        "../../../mindspore/ccsrc/device/ascend/ascend_device_address.cc"
        "../../../mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc"
        "../../../mindspore/ccsrc/predict/generator/utils/ir_model_util.cc"