!6805 fix opencl mem leak

Merge pull request !6805 from wandongdong/r1.0
5 years ago · 6caf53008a
parent 14b496bcab 8523b68894
commit 6caf53008a
6 changed files with 358 additions and 294 deletions
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
@ -133,11 +133,10 @@ int ActivationOpenClKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si
  return RET_OK;
 }

-kernel::LiteKernel *OpenClActivationFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                                      const std::vector<lite::Tensor *> &outputs,
-                                                      OpParameter *opParameter, const lite::InnerContext *ctx,
-                                                      const kernel::KernelKey &desc,
-                                                      const mindspore::lite::PrimitiveC *primitive) {
+kernel::LiteKernel *OpenClActivationKernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                                  const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
+                                                  const lite::InnerContext *ctx, const kernel::KernelKey &desc,
+                                                  const mindspore::lite::PrimitiveC *primitive) {
  if (inputs.empty()) {
    MS_LOG(ERROR) << "Input data size must be greater than 0, but your size is " << inputs.size();
    return nullptr;
@ -160,5 +159,6 @@ kernel::LiteKernel *OpenClActivationFp32KernelCreator(const std::vector<lite::Te
  }
  return kernel;
 }
-REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Activation, OpenClActivationFp32KernelCreator)
+REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Activation, OpenClActivationKernelCreator)
+REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Activation, OpenClActivationKernelCreator)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
@ -38,11 +38,10 @@ constexpr size_t CI_TILE = C4NUM;
 constexpr size_t CO_TILE = C4NUM;

 int ConvolutionOpenCLKernel::Init() {
-  static int init_count = 0;
+  unsigned int init_count = 0;
  auto allocator = ocl_runtime_->GetAllocator();
  auto param = reinterpret_cast<ConvParameter *>(op_parameter_);
  std::set<std::string> build_options;
-  init_count++;
  use_fp16_ = ocl_runtime_->GetFp16Enable();

  if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
@ -70,23 +69,48 @@ int ConvolutionOpenCLKernel::Init() {
  TILES_XY_ = TILES_X_ * TILES_Y_;
  use_winograd_ = UseWinograd4x4To6x6();

+  std::vector<int> vpara{IH_, IW_, OH_, OW_, KH_, KW_, CI_SLICES_, CO_SLICES_,
+                         param->stride_h_,
+                         param->stride_w_,
+                         param->pad_u_,
+                         param->pad_l_,
+                         param->pad_d_,
+                         param->pad_r_};
+  std::string code_id;
+  for (auto &iv : vpara) {
+    code_id += "_" + std::to_string(iv);
+  }
+
+  // gen code id: bit(OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE, format(nhwc4, hc4hw4) , act_type(relu6, relu),
+  // padTop||padBottom, check_ow, fp16)
+  init_count |= ((unsigned int)use_fp16_);
+  init_count |= ((unsigned int)(OW_ % 2 == 1)) << 1;
+  init_count |= ((unsigned int)(param->pad_u_ || param->pad_d_)) << 2;
+  init_count |= ((unsigned int)(param->act_type_ == ActType_Relu)) << 3;
+  init_count |= ((unsigned int)(param->act_type_ == ActType_Relu6)) << 4;
+  init_count |= ((unsigned int)(op_format_ == schema::Format_NHWC4)) << 5;
+  init_count |= ((unsigned int)(op_format_ == schema::Format_NC4HW4)) << 6;
+  init_count |= ((unsigned int)(OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE)) << 7;
+
+  code_id += "_" + std::to_string(init_count);
+
  // build kernel
  if (use_winograd_) {
    MS_LOG(DEBUG) << "use winograd";
    std::string program_name;
-    program_name = "Winograd4x4To36" + std::to_string(init_count);
+    program_name = "Winograd4x4To36" + code_id;
    ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36());
    ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);

-    program_name = "WinogradConvolution" + std::to_string(init_count);
+    program_name = "WinogradConvolution" + code_id;
    ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution());
    ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);

-    program_name = "Winograd36To4x4" + std::to_string(init_count);
+    program_name = "Winograd36To4x4" + code_id;
    ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4());
    ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
  } else {
-    std::string program_name = "convolution" + std::to_string(init_count);
+    std::string program_name = "convolution" + code_id;
    std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4();
    ocl_runtime_->LoadSource(program_name, source);
    ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);
--- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc
@ -83,16 +83,16 @@ int OpenCLRuntime::Init() {
  MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION;

 #ifdef USE_OPENCL_WRAPPER
-  if (OpenCLWrapper::GetInstance()->LoadOpenCLLibrary() == false) {
+  if (lite::opencl::LoadOpenCLLibrary(handle_) == false) {
    MS_LOG(ERROR) << "Load OpenCL symbols failed!";
    return RET_ERROR;
  }
 #endif  // USE_OPENCL_WRAPPER

  std::vector<cl::Platform> platforms;
-  cl::Platform::get(&platforms);
+  cl_int ret = cl::Platform::get(&platforms);
  if (platforms.size() == 0) {
-    MS_LOG(ERROR) << "OpenCL Platform not found!";
+    MS_LOG(ERROR) << "OpenCL Platform not found!" << CLErrorCode(ret);
    return RET_ERROR;
  }

@ -137,7 +137,7 @@ int OpenCLRuntime::Init() {
               << max_work_item_sizes_[2];

  gpu_info_ = ParseGpuInfo(device_name, device_version);
-  cl_int ret;
+//  cl_int ret;
 #if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120)
  // create context from glcontext
  MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
@ -235,7 +235,8 @@ int OpenCLRuntime::Uninit() {
  context_ = nullptr;
  device_ = nullptr;
 #ifdef USE_OPENCL_WRAPPER
-  OpenCLWrapper::GetInstance()->UnLoadOpenCLLibrary();
+  lite::opencl::UnLoadOpenCLLibrary(handle_);
+  handle_ = nullptr;
 #endif
  init_done_ = false;
  return RET_OK;
--- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.h
@ -166,6 +166,7 @@ class OpenCLRuntime {
  cl_device_svm_capabilities svm_capabilities_{0};
  cl_uint image_pitch_align_{0};
  std::vector<size_t> max_work_item_sizes_;
+  void *handle_{nullptr};
 };

 }  // namespace mindspore::lite::opencl
--- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
--- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.h
+++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.h