diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d_transpose.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d_transpose.cl index 22bd59396a..aec35e711d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d_transpose.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d_transpose.cl @@ -44,13 +44,13 @@ __kernel void conv2d_transpose(__read_only image2d_t src_data, __write_only imag for (int ci = 0; ci < src_size.z; ++ci) { FLT4 x0 = (FLT4)0.f; FLT4 x2 = (FLT4)0.f; - if (out0_src_h < src_size.x) { + if (out0_src_h < src_size.x && out0_src_h >= 0) { x0 = READ_IMAGE(src_data, smp_zero, (int2)(out0_src_w * src_size.z + ci, n * src_size.x + out0_src_h)); x2 = READ_IMAGE(src_data, smp_zero, (int2)(out1_src_w * src_size.z + ci, n * src_size.x + out0_src_h)); } FLT4 x1 = (FLT4)0.f; FLT4 x3 = (FLT4)0.f; - if (out1_src_h < src_size.x) { + if (out1_src_h < src_size.x && out1_src_h >= 0) { x1 = READ_IMAGE(src_data, smp_zero, (int2)(out0_src_w * src_size.z + ci, n * src_size.x + out1_src_h)); x3 = READ_IMAGE(src_data, smp_zero, (int2)(out1_src_w * src_size.z + ci, n * src_size.x + out1_src_h)); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc index 7347710b82..779322c179 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc @@ -245,11 +245,6 @@ void ConvertFilter(void *src, void *dst, TypeId src_dtype, TypeId dst_dtype, Fil void Conv2DOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); - auto ret = DequantWeight(); - if (ret != RET_OK) { - return; - } - // allocate opencl memory: buffer or image2d size_t size = 0; int Ogroup = block_size_.C; @@ -285,7 +280,6 @@ void Conv2DOpenCLKernel::InitFilter() { allocator->UnmapBuffer(packed_filter_); } - FreeDequantedWeight(); FreeTmpWeight(in_tensors_.at(kWeightIndex)->data_c()); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index 6d367d744a..2a2202cb17 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -132,10 +132,6 @@ int Conv2dTransposeOpenCLKernel::InitWeights() { } int Conv2dTransposeOpenCLKernel::InitFilter() { - auto ret = DequantWeight(); - if (ret != RET_OK) { - return ret; - } auto *param = reinterpret_cast(op_parameter_); int ci = in_tensors_[0]->shape()[3]; int co = out_tensors_[0]->shape()[3]; @@ -191,7 +187,6 @@ int Conv2dTransposeOpenCLKernel::InitFilter() { } } allocator->UnmapBuffer(padWeight_); - FreeDequantedWeight(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index f15de87d94..5c04e75c2d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -102,10 +102,6 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { } int DepthwiseConv2dOpenCLKernel::InitWeights() { - auto ret = DequantWeight(); - if (ret != RET_OK) { - return ret; - } auto parameter = reinterpret_cast(op_parameter_); auto allocator = ocl_runtime_->GetAllocator(); bool is_fp16 = ocl_runtime_->GetFp16Enable(); @@ -162,7 +158,6 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() { } else { packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data()); } - FreeDequantedWeight(); if (packed_weight_ == nullptr) { return RET_ERROR; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index 2429d30693..62179c70ca 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -58,18 +58,6 @@ int FullConnectionOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "Unsupported activation type " << param->act_type_; return RET_ERROR; } - // for fusion: ActivationType_TANH - switch (static_cast(param->act_type_)) { - case ActType_No: - case ActType_Relu: - case ActType_Relu6: - case ActivationType_TANH: - break; - default: { - MS_LOG(ERROR) << "Unsupported activation type " << param->act_type_; - return RET_ERROR; - } - } N_ = out_gpu_info.N; CO_ = out_gpu_info.C; auto intensor_shape = GpuTensorInfo(in_tensors_[0]); @@ -140,10 +128,6 @@ int FullConnectionOpenCLKernel::InitWeights() { } // namespace mindspore::kernel int FullConnectionOpenCLKernel::InitFilter() { - auto ret = DequantWeight(); - if (ret != RET_OK) { - return ret; - } auto allocator = ocl_runtime_->GetAllocator(); auto intensor_shape = GpuTensorInfo(in_tensors_[0]); int co4 = UP_DIV(CO_, C4NUM); @@ -196,7 +180,6 @@ int FullConnectionOpenCLKernel::InitFilter() { } } allocator->UnmapBuffer(padWeight_); - FreeDequantedWeight(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 94f78de470..b496ba3ddf 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -106,10 +106,6 @@ int MatMulOpenCLKernel::InitWeights() { return RET_OK; } // ABMCI @ ABCICO = ABMCO - auto ret = DequantWeight(); - if (ret != RET_OK) { - return ret; - } auto allocator = ocl_runtime_->GetAllocator(); auto weight_shape = in_tensors_[1]->shape(); int weight_ndim = weight_shape.size(); @@ -184,7 +180,6 @@ int MatMulOpenCLKernel::InitWeights() { } allocator->UnmapBuffer(padWeight_); - FreeDequantedWeight(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc index 0026fa72dc..93ed464e92 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc @@ -69,10 +69,6 @@ void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { int StrassenOpenCLKernel::InitWeights() { // ABMCI @ ABCICO = ABMCO - auto ret = DequantWeight(); - if (ret != RET_OK) { - return ret; - } auto allocator = ocl_runtime_->GetAllocator(); int NumA = in_tensors_[0]->shape()[0]; int NumB = in_tensors_[1]->shape()[0]; @@ -105,7 +101,6 @@ int StrassenOpenCLKernel::InitWeights() { } } allocator->UnmapBuffer(padWeight_); - FreeDequantedWeight(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc index 86dacd0086..9f9ef01007 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc @@ -91,11 +91,6 @@ void WinogradOpenCLKernel::BuildKernel() { void WinogradOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); - auto ret = DequantWeight(); - if (ret != RET_OK) { - return; - } - // allocate opencl memory: buffer or image2d size_t size = 0; int Ogroup = 2; @@ -140,8 +135,6 @@ void WinogradOpenCLKernel::InitFilter() { memcpy(packed_filter_, tmp.data(), size); allocator->UnmapBuffer(packed_filter_); } - - FreeDequantedWeight(); } void WinogradOpenCLKernel::AllocateMemory() { diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc index feb461bf7b..601afb9797 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc @@ -351,57 +351,6 @@ std::set OpenCLKernel::GenerateLocalByGlobal(size_t global_i) { return local_; } -int OpenCLKernel::DequantWeight() { - bool is_fp16 = ocl_runtime_->GetFp16Enable(); - auto *weight_tensor = in_tensors_.at(kWeightIndex); - restore_quant_data_ = weight_tensor->data_c(); - dequant_flag_ = !weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited && - restore_quant_data_ != nullptr; - if (dequant_flag_) { - void *dequant_weight{nullptr}; - bool set_flag{true}; - if (is_fp16) { -#ifdef ENABLE_ARM64 - if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeInt8) { - dequant_weight = lite::DequantUtil::DequantData(weight_tensor); - weight_tensor->set_data_type(kNumberTypeFloat16); - } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeInt16) { - dequant_weight = lite::DequantUtil::DequantData(weight_tensor); - weight_tensor->set_data_type(kNumberTypeFloat16); - } else { - set_flag = false; - } -#else - set_flag = false; -#endif - } else { - if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeInt8) { - dequant_weight = lite::DequantUtil::DequantData(weight_tensor); - weight_tensor->set_data_type(kNumberTypeFloat32); - } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeInt16) { - dequant_weight = lite::DequantUtil::DequantData(weight_tensor); - weight_tensor->set_data_type(kNumberTypeFloat32); - } else { - set_flag = false; - } - } - if (set_flag && dequant_weight == nullptr) { - MS_LOG(ERROR) << "dequant data failed."; - return RET_ERROR; - } - weight_tensor->set_data(dequant_weight); - } - return RET_OK; -} - -void OpenCLKernel::FreeDequantedWeight() { - auto *weight_tensor = in_tensors_.at(kWeightIndex); - if (dequant_flag_) { - free(weight_tensor->data_c()); - weight_tensor->set_data(restore_quant_data_); - } -} - int OpenCLKernel::CheckSpecs() { if (out_mem_type_ == lite::opencl::MemType::IMG) { if (!GpuTensorInfo(out_tensors_[0]).IsImageSizeValid()) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index f4d9192cac..9d245025bb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -202,8 +202,6 @@ class OpenCLKernel : public LiteKernel { void SetMemType(lite::opencl::MemType mem_type) { out_mem_type_ = mem_type; } OpParameter *GetParameter() { return op_parameter_; } virtual double GetProfilingTimeMs(); - int DequantWeight(); - void FreeDequantedWeight(); virtual int InferShape(); protected: diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 835bc4ba50..2ec491ac8b 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -345,9 +345,22 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in kernel::KernelKey gpu_desc{kGPU, kNumberTypeFloat32, desc.type}; if (context_->IsGpuFloat16Enabled()) gpu_desc.data_type = kNumberTypeFloat16; if (in_tensors.front()->data_type() == kNumberTypeInt8) gpu_desc.data_type = kNumberTypeInt8; + + // weight quant + std::map restored_origin_tensors; + for (auto &tensor : in_tensors) { + int index = 0; + auto channel_first = IsChannelFirst(index++, op_parameter); + auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kNumberTypeFloat32); + if (restore_tensor != nullptr) { + restored_origin_tensors[tensor] = restore_tensor; + } + } + auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, gpu_desc, op_parameter); if (kernel != nullptr) { MS_LOG(DEBUG) << "Get gpu op success: " << PrimitiveCurVersionTypeName(gpu_desc.type) << " " << node->name_; + FreeRestoreTensors(&restored_origin_tensors); return kernel; } else { MS_LOG(DEBUG) << "Get gpu op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(gpu_desc.type) << " " @@ -356,6 +369,7 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in if (ret == RET_INFER_INVALID || ret == RET_OK) { op_parameter = op_parameters_[node->output_indices_.at(0)]; } else { + RestoreTensorData(restored_origin_tensors); MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; return nullptr; } diff --git a/mindspore/lite/test/models_gpu_weightquant.cfg b/mindspore/lite/test/models_gpu_weightquant.cfg index 54ae30da47..dfc9d8397a 100644 --- a/mindspore/lite/test/models_gpu_weightquant.cfg +++ b/mindspore/lite/test/models_gpu_weightquant.cfg @@ -1 +1 @@ -ml_face_openclose.tflite \ No newline at end of file +ml_face_openclose.tflite