!6805 fix opencl mem leak

Merge pull request !6805 from wandongdong/r1.0
pull/6805/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 6caf53008a

@ -133,11 +133,10 @@ int ActivationOpenClKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si
return RET_OK;
}
kernel::LiteKernel *OpenClActivationFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::InnerContext *ctx,
const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
kernel::LiteKernel *OpenClActivationKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (inputs.empty()) {
MS_LOG(ERROR) << "Input data size must be greater than 0, but your size is " << inputs.size();
return nullptr;
@ -160,5 +159,6 @@ kernel::LiteKernel *OpenClActivationFp32KernelCreator(const std::vector<lite::Te
}
return kernel;
}
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Activation, OpenClActivationFp32KernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Activation, OpenClActivationKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Activation, OpenClActivationKernelCreator)
} // namespace mindspore::kernel

@ -38,11 +38,10 @@ constexpr size_t CI_TILE = C4NUM;
constexpr size_t CO_TILE = C4NUM;
int ConvolutionOpenCLKernel::Init() {
static int init_count = 0;
unsigned int init_count = 0;
auto allocator = ocl_runtime_->GetAllocator();
auto param = reinterpret_cast<ConvParameter *>(op_parameter_);
std::set<std::string> build_options;
init_count++;
use_fp16_ = ocl_runtime_->GetFp16Enable();
if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
@ -70,23 +69,48 @@ int ConvolutionOpenCLKernel::Init() {
TILES_XY_ = TILES_X_ * TILES_Y_;
use_winograd_ = UseWinograd4x4To6x6();
std::vector<int> vpara{IH_, IW_, OH_, OW_, KH_, KW_, CI_SLICES_, CO_SLICES_,
param->stride_h_,
param->stride_w_,
param->pad_u_,
param->pad_l_,
param->pad_d_,
param->pad_r_};
std::string code_id;
for (auto &iv : vpara) {
code_id += "_" + std::to_string(iv);
}
// gen code id: bit(OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE, format(nhwc4, hc4hw4) , act_type(relu6, relu),
// padTop||padBottom, check_ow, fp16)
init_count |= ((unsigned int)use_fp16_);
init_count |= ((unsigned int)(OW_ % 2 == 1)) << 1;
init_count |= ((unsigned int)(param->pad_u_ || param->pad_d_)) << 2;
init_count |= ((unsigned int)(param->act_type_ == ActType_Relu)) << 3;
init_count |= ((unsigned int)(param->act_type_ == ActType_Relu6)) << 4;
init_count |= ((unsigned int)(op_format_ == schema::Format_NHWC4)) << 5;
init_count |= ((unsigned int)(op_format_ == schema::Format_NC4HW4)) << 6;
init_count |= ((unsigned int)(OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE)) << 7;
code_id += "_" + std::to_string(init_count);
// build kernel
if (use_winograd_) {
MS_LOG(DEBUG) << "use winograd";
std::string program_name;
program_name = "Winograd4x4To36" + std::to_string(init_count);
program_name = "Winograd4x4To36" + code_id;
ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36());
ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);
program_name = "WinogradConvolution" + std::to_string(init_count);
program_name = "WinogradConvolution" + code_id;
ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution());
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);
program_name = "Winograd36To4x4" + std::to_string(init_count);
program_name = "Winograd36To4x4" + code_id;
ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4());
ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
} else {
std::string program_name = "convolution" + std::to_string(init_count);
std::string program_name = "convolution" + code_id;
std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4();
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);

@ -83,16 +83,16 @@ int OpenCLRuntime::Init() {
MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION;
#ifdef USE_OPENCL_WRAPPER
if (OpenCLWrapper::GetInstance()->LoadOpenCLLibrary() == false) {
if (lite::opencl::LoadOpenCLLibrary(handle_) == false) {
MS_LOG(ERROR) << "Load OpenCL symbols failed!";
return RET_ERROR;
}
#endif // USE_OPENCL_WRAPPER
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cl_int ret = cl::Platform::get(&platforms);
if (platforms.size() == 0) {
MS_LOG(ERROR) << "OpenCL Platform not found!";
MS_LOG(ERROR) << "OpenCL Platform not found!" << CLErrorCode(ret);
return RET_ERROR;
}
@ -137,7 +137,7 @@ int OpenCLRuntime::Init() {
<< max_work_item_sizes_[2];
gpu_info_ = ParseGpuInfo(device_name, device_version);
cl_int ret;
// cl_int ret;
#if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120)
// create context from glcontext
MS_LOG(INFO) << "Create special opencl context to share with OpenGL";
@ -235,7 +235,8 @@ int OpenCLRuntime::Uninit() {
context_ = nullptr;
device_ = nullptr;
#ifdef USE_OPENCL_WRAPPER
OpenCLWrapper::GetInstance()->UnLoadOpenCLLibrary();
lite::opencl::UnLoadOpenCLLibrary(handle_);
handle_ = nullptr;
#endif
init_done_ = false;
return RET_OK;

@ -166,6 +166,7 @@ class OpenCLRuntime {
cl_device_svm_capabilities svm_capabilities_{0};
cl_uint image_pitch_align_{0};
std::vector<size_t> max_work_item_sizes_;
void *handle_{nullptr};
};
} // namespace mindspore::lite::opencl

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save