diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl index 847ebb98b7..66ffe8efd1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/transpose.cl @@ -123,7 +123,7 @@ __kernel void transpose_0231_NHWC4(__read_only image2d_t src_data, __write_only if (X >= shape.y || 4 * Y >= shape.z || 4 * Z >= shape.w) { return; } - int W4 = UP_DIV(shape.y, 4); + int W4 = UP_DIV(shape.z, 4); int C4 = UP_DIV(shape.w, 4); FLT4 src0 = READ_IMAGE(src_data, smp_zero, (int2)(X * W4 + Y, 4 * Z)); FLT4 src1 = (FLT4)0.f; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index a6f05a0490..e01db37085 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -61,12 +61,12 @@ int ActivationOpenClKernel::Init() { MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_; return mindspore::lite::RET_ERROR; } - std::map Program_Kernel{{ActivationType_LEAKY_RELU, "LeakyRelu"}, - {ActivationType_RELU, "Relu"}, - {ActivationType_SIGMOID, "Sigmoid"}, - {ActivationType_RELU6, "Relu6"}, - {ActivationType_TANH, "Tanh"}}; - if (Program_Kernel.count(type_) == 0) { + std::map kernel_names{{ActivationType_LEAKY_RELU, "LeakyRelu"}, + {ActivationType_RELU, "Relu"}, + {ActivationType_SIGMOID, "Sigmoid"}, + {ActivationType_RELU6, "Relu6"}, + {ActivationType_TANH, "Tanh"}}; + if (kernel_names.count(type_) == 0) { MS_LOG(ERROR) << "schema::ActivationType:" << type_ << "not found"; return mindspore::lite::RET_ERROR; } @@ -75,12 +75,8 @@ int ActivationOpenClKernel::Init() { std::set build_options; std::string program_name = "Activation"; ocl_runtime_->LoadSource(program_name, source); - std::string kernel_name = Program_Kernel[type_]; + std::string kernel_name = kernel_names[type_]; ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << op_parameter_->name_ << " init Done!"; return mindspore::lite::RET_OK; } @@ -107,32 +103,12 @@ int ActivationOpenClKernel::Run() { cl_int4 ActivationOpenClKernel::GetImg2dShape() { cl_int4 img2d_shape = {1, 1, 1, 1}; - if (op_format_ == schema::Format_NHWC4) { - img2d_shape.s[1] = nhwc_shape_[1]; - img2d_shape.s[2] = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); - img2d_shape.s[3] = C4NUM; - } - if (op_format_ == schema::Format_NC4HW4) { - img2d_shape.s[1] = UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1]; - img2d_shape.s[2] = nhwc_shape_[2]; - img2d_shape.s[3] = C4NUM; - } + img2d_shape.s[1] = nhwc_shape_[1]; + img2d_shape.s[2] = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); + img2d_shape.s[3] = C4NUM; return img2d_shape; } -int ActivationOpenClKernel::GetImageSize(size_t idx, std::vector *img_size) { - cl_int4 img_shape = GetImg2dShape(); - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - img_size->push_back(img_shape.s[2]); - img_size->push_back(img_shape.s[1]); - img_size->push_back(img_dtype); - return mindspore::lite::RET_OK; -} - kernel::LiteKernel *OpenClActivationKernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h index 6cc25edb6d..f2b44af5fb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h @@ -26,27 +26,25 @@ namespace mindspore::kernel { class ActivationOpenClKernel : public OpenCLKernel { public: - explicit ActivationOpenClKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs) { - type_ = (reinterpret_cast(parameter))->type_; - alpha_ = (reinterpret_cast(parameter))->alpha_; - } - ~ActivationOpenClKernel() override{}; + ActivationOpenClKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : OpenCLKernel(parameter, inputs, outputs), + type_(reinterpret_cast(parameter)->type_), + alpha_(reinterpret_cast(parameter)->alpha_) {} + ~ActivationOpenClKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - cl_int4 GetImg2dShape(); - void InitBuffer() {} private: + cl_int4 GetImg2dShape(); + cl::Kernel kernel_; int type_; float alpha_; - int in_size_; - int out_size_; - size_t fp_size; + int in_size_{}; + int out_size_{}; + size_t fp_size{}; bool enable_fp16_{false}; std::vector nhwc_shape_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index fc8cfb4def..cfe68b7916 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -34,79 +34,35 @@ using mindspore::schema::PrimitiveType_Eltwise; namespace mindspore::kernel { -ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() {} - std::vector ArithmeticOpenCLKernel::InitGlobalSize() const { - const size_t global_x = out_tensors_[0]->Width(); - const size_t global_y = out_tensors_[0]->Height(); - const size_t global_z = UP_ROUND_DIV(out_tensors_[0]->Channel(), 4); - std::vector global = {global_x, global_y, global_z}; - return global; + auto out_shape = out_tensors_[0]->shape(); + if (out_shape.size() == 2) { + const size_t global_x = 1; + const size_t global_y = 1; + const size_t global_z = UP_ROUND_DIV(out_shape[1], C4NUM); + std::vector global = {global_x, global_y, global_z}; + return global; + } else { + const size_t global_x = out_shape[2]; + const size_t global_y = out_shape[1]; + const size_t global_z = UP_ROUND_DIV(out_shape[3], C4NUM); + std::vector global = {global_x, global_y, global_z}; + return global; + } } void ArithmeticOpenCLKernel::Image2dGetWorkGroupSize() { local_size_ = {16, 16}; - if (out_tensors_[0]->shape().size() == 2) { - size_t H = out_tensors_[0]->shape()[0]; - size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); - global_size_ = {W, H}; - return; - } - if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { - size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t W = out_tensors_[0]->Width(); - global_size_ = {W, H}; - } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { - size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); - size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - global_size_ = {W, H}; - } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { - size_t H = out_tensors_[0]->Batch(); - size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM); + auto out_shape = out_tensors_[0]->shape(); + if (out_shape.size() == 2) { + size_t H = out_shape[0]; + size_t W = UP_DIV(out_shape[1], C4NUM); global_size_ = {W, H}; } else { - MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); - } -} - -void ArithmeticOpenCLKernel::BufferGetWorkGroupSize() { - uint32_t element_num = out_tensors_[0]->ElementsC4Num(); - global_size_ = {element_num}; -} - -int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - if (out_tensors_[0]->shape().size() == 2) { - im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); - im_dst_y = out_tensors_[0]->shape()[0]; - } else { - if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { - im_dst_x = out_tensors_[0]->Width(); - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); - } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { - im_dst_y = out_tensors_[0]->Batch(); - im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - } else { - MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); - return RET_ERROR; - } - } - - size_t img_dtype = CL_FLOAT; - if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { - img_dtype = CL_HALF_FLOAT; - } else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { - img_dtype = CL_FLOAT; - } else { - MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type(); + size_t H = out_shape[0] * out_shape[1]; + size_t W = out_shape[2] * UP_DIV(out_shape[3], C4NUM); + global_size_ = {W, H}; } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; } int ArithmeticOpenCLKernel::InitBuffer() { @@ -119,7 +75,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { inputs_weight_ptrs_.push_back(nullptr); } else { auto allocator = ocl_runtime_->GetAllocator(); - std::vector img_size = GetImage2dShapeFromNHWC(nhwc_shape, op_format_); + std::vector img_size = GetImage2dShapeFromNHWC(nhwc_shape, schema::Format_NHWC4); int pack_weight_size = img_size[0] * img_size[1] * C4NUM; int plane = nhwc_shape[1] * nhwc_shape[2]; int channel = nhwc_shape[3]; @@ -132,22 +88,12 @@ int ArithmeticOpenCLKernel::InitBuffer() { return RET_ERROR; } memset(weight, 0x00, pack_weight_size * data_size); - if (op_format_ == schema::Format_NHWC4) { - if (in_tensor_->data_type() == kNumberTypeFloat32) { - std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } else if (in_tensor_->data_type() == kNumberTypeFloat16) { - std::function to_dtype = [](float16_t x) -> float { return static_cast(x); }; - PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } - } else if (op_format_ == schema::Format_NC4HW4) { - if (in_tensor_->data_type() == kNumberTypeFloat32) { - std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNC4HW4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } else if (in_tensor_->data_type() == kNumberTypeFloat16) { - std::function to_dtype = [](float16_t x) -> float { return static_cast(x); }; - PackNHWCToNC4HW4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } + if (in_tensor_->data_type() == kNumberTypeFloat32) { + std::function to_dtype = [](float x) -> float { return x; }; + PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); + } else if (in_tensor_->data_type() == kNumberTypeFloat16) { + std::function to_dtype = [](float16_t x) -> float { return static_cast(x); }; + PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); } if (batch * plane * channel == 1) { // scalar @@ -163,22 +109,12 @@ int ArithmeticOpenCLKernel::InitBuffer() { return RET_ERROR; } memset(weight, 0x00, pack_weight_size * data_size); - if (op_format_ == schema::Format_NHWC4) { - if (in_tensor_->data_type() == kNumberTypeFloat32) { - std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } else if (in_tensor_->data_type() == kNumberTypeFloat16) { - std::function to_dtype = [](float16_t x) -> float16_t { return x; }; - PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } - } else if (op_format_ == schema::Format_NC4HW4) { - if (in_tensor_->data_type() == kNumberTypeFloat32) { - std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNC4HW4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } else if (in_tensor_->data_type() == kNumberTypeFloat16) { - std::function to_dtype = [](float16_t x) -> float16_t { return x; }; - PackNHWCToNC4HW4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); - } + if (in_tensor_->data_type() == kNumberTypeFloat32) { + std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; + PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); + } else if (in_tensor_->data_type() == kNumberTypeFloat16) { + std::function to_dtype = [](float16_t x) -> float16_t { return x; }; + PackNHWCToNHWC4(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); } if (batch * plane * channel == 1) { // scalar @@ -195,18 +131,11 @@ int ArithmeticOpenCLKernel::InitBuffer() { int ArithmeticOpenCLKernel::Init() { std::string kernel_name; - - const ArithmeticParameter *arithmetic_parameter = reinterpret_cast(op_parameter_); + auto *arithmetic_parameter = reinterpret_cast(op_parameter_); if (arithmetic_parameter->broadcasting_) { element_flag_ = false; - if (op_format_ == schema::Format_NHWC4) { - kernel_name = "BroadcastNHWC4"; - } else { - kernel_name = "BroadcastNC4HW4"; - MS_LOG(ERROR) << "Don't support BroadcastNC4HW4 yet"; - return RET_ERROR; - } + kernel_name = "BroadcastNHWC4"; } else { kernel_name = "Element"; } @@ -302,17 +231,6 @@ int ArithmeticOpenCLKernel::Init() { return error_code; } - auto format = schema::Format::Format_NHWC4; - if (arithmetic_parameter->ndim_ == 2) { - format = schema::Format::Format_NC4; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(format); - if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) { - in_tensors_[1]->SetFormat(format); - } - out_tensors_[0]->SetFormat(format); Image2dGetWorkGroupSize(); InitBuffer(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h index 2bd469d914..29142f0efb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h @@ -25,20 +25,18 @@ namespace mindspore::kernel { class ArithmeticOpenCLKernel : public OpenCLKernel { public: - explicit ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx) + ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) : OpenCLKernel(parameter, inputs, outputs) {} - ~ArithmeticOpenCLKernel() override; + ~ArithmeticOpenCLKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; + int InitBuffer() override; private: std::vector InitGlobalSize() const; void Image2dGetWorkGroupSize(); - void BufferGetWorkGroupSize(); - int InitBuffer(); cl::Kernel kernel_; bool element_flag_{true}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index c99cd2b9fd..e0ce56caa4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -41,38 +41,6 @@ using mindspore::schema::PrimitiveType_Square; namespace mindspore::kernel { -int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - auto out_shape = out_tensors_[0]->shape(); - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { - if (in_tensors_[0]->shape().size() == 4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); - } else { - im_dst_x = UP_DIV(out_shape[1], C4NUM); - im_dst_y = out_tensors_[0]->Batch(); - } - } else { - if (in_tensors_[0]->shape().size() == 4) { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } else { - im_dst_y = out_tensors_[0]->Batch() * UP_DIV(out_shape[1], C4NUM); - im_dst_x = 1; - } - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - void ArithmeticSelfOpenCLKernel::GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param) { switch (param->op_parameter_.type_) { case PrimitiveType_Abs: @@ -126,24 +94,9 @@ int ArithmeticSelfOpenCLKernel::Init() { } auto param = reinterpret_cast(this->op_parameter_); - auto in_format = op_format_; - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4 && in_format != schema::Format_NC4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return mindspore::lite::RET_ERROR; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(op_format_); - std::string kernel_name = "ArithmeticSelf"; GetKernelName(&kernel_name, param); - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else if (in_format == schema::Format_NHWC4) { - kernel_name += "_NHWC4"; - } + kernel_name += "_NHWC4"; MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; std::set build_options; std::string source = arithmeticself_source; @@ -154,8 +107,6 @@ int ArithmeticSelfOpenCLKernel::Init() { return mindspore::lite::RET_OK; } -int ArithmeticSelfOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - void ArithmeticSelfGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { const int max_divider = 8; const int max_x = 4, max_y = 8; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h index 9bdfcf27b5..be31cb97d8 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h @@ -26,23 +26,19 @@ namespace mindspore::kernel { class ArithmeticSelfOpenCLKernel : public OpenCLKernel { public: - explicit ArithmeticSelfOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ArithmeticSelfOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ArithmeticSelfOpenCLKernel() override{}; + ~ArithmeticSelfOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - + private: void GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param); - private: cl::Kernel kernel_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc index 3f23ddc328..754df35b31 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc @@ -32,18 +32,12 @@ using mindspore::schema::PrimitiveType_BatchToSpaceND; namespace mindspore::kernel { int BatchToSpaceNDOpenCLKernel::Init() { - std::string kernel_name = "batch_to_space_nd"; - auto in_format = op_format_; + std::string kernel_name = "batch_to_space_nd_NHWC4"; if (in_tensors_[0]->shape().size() != 4 && out_tensors_[0]->shape().size() != 4) { MS_LOG(ERROR) << "input/output shape size must be 4, actual: " << in_tensors_[0]->shape().size() << ", " << out_tensors_[0]->shape().size(); return RET_ERROR; } - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } auto *param = reinterpret_cast(this->op_parameter_); if (param->block_shape_[0] < 1 || param->block_shape_[1] < 1) { MS_LOG(ERROR) << "block_sizes_ must > 1, actual " << param->block_shape_[0] << ", " << param->block_shape_[1]; @@ -55,18 +49,10 @@ int BatchToSpaceNDOpenCLKernel::Init() { return RET_ERROR; } - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else { - kernel_name += "_NHWC4"; - } + std::set build_options; std::string source = batch_to_space_nd_source; std::string program_name = "batch_to_space_nd"; @@ -76,28 +62,7 @@ int BatchToSpaceNDOpenCLKernel::Init() { MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int BatchToSpaceNDOpenCLKernel::InitBuffer() { return RET_OK; } -int BatchToSpaceNDOpenCLKernel::ReSize() { return RET_OK; } -int BatchToSpaceNDOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = std::move(vec); - return RET_OK; -} + int BatchToSpaceNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h index 8e2c117370..f710c4d436 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h @@ -25,22 +25,16 @@ namespace mindspore::kernel { class BatchToSpaceNDOpenCLKernel : public OpenCLKernel { public: - explicit BatchToSpaceNDOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + BatchToSpaceNDOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~BatchToSpaceNDOpenCLKernel() override{}; + ~BatchToSpaceNDOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - - int InitBuffer(); - private: cl::Kernel kernel_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index 157891bbae..d131bf36a4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -30,45 +30,8 @@ using mindspore::schema::PrimitiveType_BatchNorm; namespace mindspore::kernel { -int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - int BatchNormOpenCLKernel::Init() { - auto in_format = op_format_; - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(op_format_); - std::string kernel_name = "Batch_normalization"; - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else if (in_format == schema::Format_NHWC4) { - kernel_name += "_NHWC4"; - } - + std::string kernel_name = "Batch_normalization_NHWC4"; std::set build_options; std::string source = batchnorm_source; std::string program_name = "Batch_normalization"; @@ -78,8 +41,6 @@ int BatchNormOpenCLKernel::Init() { return RET_OK; } -int BatchNormOpenCLKernel::ReSize() { return RET_OK; } - void BatchNormGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { const int max_divider = 8; const int max_x = 4, max_y = 8; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h index 8f44b78f87..435962cc5c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h @@ -25,20 +25,16 @@ namespace mindspore::kernel { class BatchNormOpenCLKernel : public OpenCLKernel { public: - explicit BatchNormOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + BatchNormOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~BatchNormOpenCLKernel() override{}; + ~BatchNormOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - private: cl::Kernel kernel_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc index 67ed3b19ea..d83eb6ebc4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc @@ -35,7 +35,7 @@ using mindspore::schema::PrimitiveType_BiasAdd; namespace mindspore::kernel { -void BiasAddOpenCLKernel::InitBuffer() { +int BiasAddOpenCLKernel::InitBuffer() { int C = in_tensors_[1]->shape()[0]; int div_ci = UP_DIV(C, C4NUM); auto allocator = ocl_runtime_->GetAllocator(); @@ -49,6 +49,7 @@ void BiasAddOpenCLKernel::InitBuffer() { memset(BiasAdd_, 0x00, div_ci * C4NUM * fp_size); memcpy(BiasAdd_, in_tensors_[1]->data_c(), C * fp_size); allocator->UnmapBuffer(BiasAdd_); + return RET_OK; } int BiasAddOpenCLKernel::Init() { @@ -77,10 +78,6 @@ int BiasAddOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << program_name << " Init Done!"; return mindspore::lite::RET_OK; } @@ -95,7 +92,7 @@ int BiasAddOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[schema::Format::Format_NHWC4]); std::vector local = {1, 1}; std::vector global = {static_cast(global_size.s[1]), static_cast(global_size.s[2])}; auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); @@ -108,32 +105,10 @@ int BiasAddOpenCLKernel::Run() { cl_int4 BiasAddOpenCLKernel::GetGlobalshape() { cl_int4 global_shape = input_shape_; - if (op_format_ == schema::Format::Format_NC4) { - global_shape.s[1] = global_shape.s[2]; - global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM); - } - if (op_format_ == schema::Format::Format_NC4HW4) { - global_shape.s[1] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[1]; // c / 4 * H - } - if (op_format_ == schema::Format::Format_NHWC4) { - global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2]; - } + global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2]; return global_shape; } -int BiasAddOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - cl_int4 img_shape = GetGlobalshape(); - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - img_size->push_back(img_shape.s[2]); - img_size->push_back(img_shape.s[1]); - img_size->push_back(img_dtype); - return mindspore::lite::RET_OK; -} - kernel::LiteKernel *OpenCLBiasAddKernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h index 913cabe92d..625e69c0c6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h @@ -28,25 +28,25 @@ namespace mindspore::kernel { class BiasAddOpenCLKernel : public OpenCLKernel { public: - explicit BiasAddOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + BiasAddOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~BiasAddOpenCLKernel() override{}; + ~BiasAddOpenCLKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - void InitBuffer(); - cl_int4 GetGlobalshape(); + int InitBuffer() override; private: + cl_int4 GetGlobalshape(); + cl::Kernel kernel_; - void *BiasAdd_; - int in_size_; - int out_size_; - size_t fp_size; - cl_int4 input_shape_; - bool enable_fp16_{false}; + void *BiasAdd_{nullptr}; + int in_size_{}; + int out_size_{}; + size_t fp_size{}; + cl_int4 input_shape_{}; + bool enable_fp16_{}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc index d09e1cd1c1..8a4304fd72 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc @@ -30,27 +30,6 @@ using mindspore::schema::PrimitiveType_Cast; namespace mindspore::kernel { -int CastOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - int CastOpenCLKernel::GetKernelName(std::string *kernel_name, CastParameter *param) { if (param->src_type_ == kNumberTypeFloat32 && param->dst_type_ == kNumberTypeFloat16) { kernel_name[0] += "_Fp32ToFp16"; @@ -65,23 +44,9 @@ int CastOpenCLKernel::GetKernelName(std::string *kernel_name, CastParameter *par int CastOpenCLKernel::Init() { auto param = reinterpret_cast(this->op_parameter_); - auto in_format = op_format_; - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(op_format_); std::string kernel_name = "Cast"; GetKernelName(&kernel_name, param); - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else if (in_format == schema::Format_NHWC4) { - kernel_name += "_NHWC4"; - } + kernel_name += "_NHWC4"; std::set build_options; std::string source = cast_source; std::string program_name = "cast"; @@ -91,8 +56,6 @@ int CastOpenCLKernel::Init() { return RET_OK; } -int CastOpenCLKernel::ReSize() { return RET_OK; } - void CastGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { const int max_divider = 8; const int max_x = 4, max_y = 8; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h index 1e536a93d2..32d0972817 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h @@ -26,23 +26,19 @@ namespace mindspore::kernel { class CastOpenCLKernel : public OpenCLKernel { public: - explicit CastOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + CastOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~CastOpenCLKernel() override{}; + ~CastOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; + private: int GetKernelName(std::string *kernel_name, CastParameter *param); - int GetImageSize(size_t idx, std::vector *img_size) override; - - private: cl::Kernel kernel_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index 8c80d2d18a..ed8d65a714 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -31,27 +31,6 @@ using mindspore::schema::PrimitiveType_Concat; namespace mindspore::kernel { -int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - int ConcatOpenCLKernel::RunAxis0() { auto allocator_ = ocl_runtime_->GetAllocator(); std::vector img_size; @@ -85,39 +64,15 @@ int ConcatOpenCLKernel::Init() { MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 "; return RET_ERROR; } - auto in_format = op_format_; - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(op_format_); std::string kernel_name = "Concat"; - if (in_tensors_.size() == 2) { - kernel_name += "2inputaxis"; - kernel_name += std::to_string(param->axis_); - } else if (in_tensors_.size() == 3) { - kernel_name += "3inputaxis"; - kernel_name += std::to_string(param->axis_); - } else if (in_tensors_.size() == 4) { - kernel_name += "4inputaxis"; - kernel_name += std::to_string(param->axis_); - } else if (in_tensors_.size() == 6) { - kernel_name += "6inputaxis"; - kernel_name += std::to_string(param->axis_); + if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 4) { + kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(param->axis_); } else { MS_LOG(ERROR) << " input must be 2 , 3 , 4 or 6"; return RET_ERROR; } - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else if (in_format == schema::Format_NHWC4) { - kernel_name += "_NHWC4"; - } + kernel_name += "_NHWC4"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::set build_options; std::string source = concat_source; @@ -128,16 +83,13 @@ int ConcatOpenCLKernel::Init() { return RET_OK; } -int ConcatOpenCLKernel::ReSize() { return RET_OK; } - int ConcatOpenCLKernel::IntegraShapeToXYZ() { - auto in_format = op_format_; - if (out_tensors_[0]->shape().size() > 4 || out_tensors_[0]->shape().size() <= 0) { + if (out_tensors_[0]->shape().size() > 4 || out_tensors_[0]->shape().empty()) { MS_LOG(ERROR) << "in_tensors_.shape() must between 0~4"; return RET_ERROR; } - if (in_format == schema::Format_NHWC4 || in_format == schema::Format_NC4HW4) { + if (out_tensors_[0]->shape().size() == 4) { for (int i = 0; i < in_tensors_.size(); ++i) { cl_int4 temp_cl; auto temp = in_tensors_[i]->shape(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h index 7f24c9f06e..a2d7cfb9d1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h @@ -25,28 +25,24 @@ namespace mindspore::kernel { class ConcatOpenCLKernel : public OpenCLKernel { public: - explicit ConcatOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ConcatOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ConcatOpenCLKernel() override{}; + ~ConcatOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; + private: int RunAxis0(); - int GetImageSize(size_t idx, std::vector *img_size) override; - int IntegraShapeToXYZ(); - private: cl::Kernel kernel_; std::vector XYZShape; - cl_int4 shape_nhwc; + cl_int4 shape_nhwc{}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index e77f7e7441..3ccc3dc2ae 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -38,8 +38,7 @@ int Conv2dTransposeOpenCLKernel::Init() { MS_LOG(ERROR) << "only support kernel - stride == 2 * pad"; return RET_ERROR; } - std::string kernel_name = "conv2d_transpose"; - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + std::string kernel_name = "conv2d_transpose_NHWC4"; enable_fp16_ = ocl_runtime_->GetFp16Enable(); #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); @@ -51,20 +50,14 @@ int Conv2dTransposeOpenCLKernel::Init() { ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif PadWeight(); - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return mindspore::lite::RET_OK; } -int Conv2dTransposeOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - void Conv2dTransposeOpenCLKernel::PadWeight() { ConvParameter *param = reinterpret_cast(op_parameter_); - int ci = in_tensors_[0]->Channel(); - int co = out_tensors_[0]->Channel(); + int ci = in_tensors_[0]->shape()[3]; + int co = out_tensors_[0]->shape()[3]; int kh = param->kernel_h_; int kw = param->kernel_w_; int div_ci = UP_DIV(ci, C4NUM); @@ -147,32 +140,6 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { allocator->UnmapBuffer(bias_); } -int Conv2dTransposeOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - int n = out_tensors_[0]->shape()[0]; - int h = out_tensors_[0]->shape()[1]; - int w = out_tensors_[0]->shape()[2]; - int c = out_tensors_[0]->shape()[3]; - if (op_format_ == schema::Format::Format_NHWC4) { - im_dst_x = w * UP_DIV(c, C4NUM); - im_dst_y = n * h; - } else if (op_format_ == schema::Format::Format_NC4HW4) { - im_dst_x = w; - im_dst_y = n * UP_DIV(c, C4NUM) * h; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return mindspore::lite::RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - int Conv2dTransposeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; ConvParameter *param = reinterpret_cast(op_parameter_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h index 3f39e01515..69afe8f77f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h @@ -27,21 +27,20 @@ namespace mindspore::kernel { class Conv2dTransposeOpenCLKernel : public OpenCLKernel { public: - explicit Conv2dTransposeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + Conv2dTransposeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~Conv2dTransposeOpenCLKernel() override{}; + ~Conv2dTransposeOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - void PadWeight(); - int GetImageSize(size_t idx, std::vector *img_size) override; private: + void PadWeight(); + cl::Kernel kernel_; - void *padWeight_; - void *bias_; + void *padWeight_{nullptr}; + void *bias_{nullptr}; bool enable_fp16_{false}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc index 6673d6f211..6a8c552333 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc @@ -29,8 +29,6 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Conv2D; using mindspore::schema::Format::Format_NC4HW4; -using mindspore::schema::Format::Format_NCHW; -using mindspore::schema::Format::Format_NHWC; using mindspore::schema::Format::Format_NHWC4; namespace mindspore::kernel { @@ -46,14 +44,6 @@ int ConvolutionOpenCLKernel::Init() { auto input_tensor = in_tensors_[0]; auto output_tensor = out_tensors_[0]; - in_ori_format_ = input_tensor->GetFormat(); - out_ori_format_ = output_tensor->GetFormat(); - if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { - MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; - return RET_ERROR; - } - input_tensor->SetFormat(op_format_); - output_tensor->SetFormat(op_format_); batch_size_ = input_tensor->Batch(); CI_ = input_tensor->Channel(); @@ -112,7 +102,7 @@ int ConvolutionOpenCLKernel::Init() { winograd_mem1_ = allocator->Malloc(size, {width, height, img_dtype}); } - this->InitBuffer(); + InitBuffer(); MS_LOG(DEBUG) << "Convolution Init Done!"; return RET_OK; @@ -251,30 +241,6 @@ int ConvolutionOpenCLKernel::InitBuffer() { return RET_OK; } -int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == Format_NHWC4) { - if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { - { - im_dst_y = batch_size_ * OH_; - im_dst_x = OW_ * CO_SLICES_; - } - } else { - im_dst_y = OW_; - im_dst_x = batch_size_ * OH_ * CO_SLICES_; - } - } else { - im_dst_y = batch_size_ * CO_SLICES_ * OH_; - im_dst_x = OW_; - } - size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; - img_size->clear(); - img_size->push_back(im_dst_x); - img_size->push_back(im_dst_y); - img_size->push_back(img_dtype); - return RET_OK; -} - int ConvolutionOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h index f5f3e149ab..6191872d64 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h @@ -23,22 +23,22 @@ #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "schema/model_generated.h" #include "nnacl/conv_parameter.h" +#include "schema/ops_generated.h" namespace mindspore::kernel { class ConvolutionOpenCLKernel : public OpenCLKernel { public: - explicit ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ConvolutionOpenCLKernel() override{}; + ~ConvolutionOpenCLKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; + int InitBuffer() override; private: - int InitBuffer(); int InitWeight(); int InitBias(); int GenerateWinogradWeight(); @@ -92,7 +92,8 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { return code_id; } - bool use_fp16_ = false; + bool use_fp16_{false}; + const schema::Format op_format_{schema::Format_NHWC4}; int batch_size_{}; int CI_{}; @@ -105,16 +106,16 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { int CO_SLICES_{}; int KH_{}; int KW_{}; - void *packed_weight_ = nullptr; - void *packed_bias_ = nullptr; - bool has_bias_ = false; + void *packed_weight_{nullptr}; + void *packed_bias_{nullptr}; + bool has_bias_{false}; - bool use_winograd_ = false; + bool use_winograd_{false}; int TILES_X_{}; int TILES_Y_{}; int TILES_XY_{}; - void *winograd_mem0_ = nullptr; - void *winograd_mem1_ = nullptr; + void *winograd_mem0_{nullptr}; + void *winograd_mem1_{nullptr}; cl::Kernel kernel_4x4to36_; cl::Kernel kernel_conv_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 7df0b90a23..be1ed30091 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -42,26 +42,12 @@ namespace mindspore::kernel { int DepthwiseConv2dOpenCLKernel::Init() { std::string kernel_name = "DepthwiseConv2d"; - auto in_format = op_format_; - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - if (in_format != schema::Format::Format_NHWC4 && in_format != schema::Format::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return mindspore::lite::RET_ERROR; - } - in_tensors_[0]->SetFormat(in_format); - out_tensors_[0]->SetFormat(in_format); if (out_mem_type_ == OpenCLMemType::BUF) { kernel_name += "_BUF"; } else { kernel_name += "_IMG"; } - if (in_format == schema::Format::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else if (in_format == schema::Format::Format_NHWC4) { - kernel_name += "_NHWC4"; - } + kernel_name += "_NHWC4"; auto parameter = reinterpret_cast(op_parameter_); if (parameter->kernel_h_ == 1) { kernel_name += "_1x1"; @@ -75,7 +61,7 @@ int DepthwiseConv2dOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - this->InitBuffer(); + InitBuffer(); MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast(out_mem_type_); return mindspore::lite::RET_OK; } @@ -146,28 +132,6 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { return mindspore::lite::RET_OK; } -int DepthwiseConv2dOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - -int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height(); - } else { - im_dst_y = out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - if (ocl_runtime_->GetFp16Enable()) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - int DepthwiseConv2dOpenCLKernel::GetGlobalSize(size_t idx, std::vector *global_size) { size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); std::vector global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h index dfeb6f7bdb..5655579ebe 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h @@ -25,27 +25,25 @@ namespace mindspore::kernel { class DepthwiseConv2dOpenCLKernel : public OpenCLKernel { public: - explicit DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs), packed_weight_(nullptr), bias_data_(nullptr), kernel_(nullptr) {} + DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : OpenCLKernel(parameter, inputs, outputs) {} - ~DepthwiseConv2dOpenCLKernel() override{}; + ~DepthwiseConv2dOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; - int InitBuffer(); + int InitBuffer() override; - int GetImageSize(size_t idx, std::vector *img_size) override; int GetGlobalSize(size_t idx, std::vector *global_size) override; + int GetLocalSize(size_t idx, const std::vector &global_size, std::vector *local_size) override; private: - void *packed_weight_; - void *bias_data_; + void *packed_weight_{nullptr}; + void *bias_data_{nullptr}; cl::Kernel kernel_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index 89daa60da3..c036ef20e2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -33,8 +33,7 @@ using mindspore::schema::PrimitiveType_FullConnection; namespace mindspore::kernel { int FullConnectionOpenCLKernel::Init() { - std::string kernel_name = "FullConnection"; - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + std::string kernel_name = "FullConnection_NHWC4"; auto param = reinterpret_cast(op_parameter_); transposeA = param->a_transpose_; if (transposeA) { @@ -77,16 +76,10 @@ int FullConnectionOpenCLKernel::Init() { #endif PadWeight(); - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int FullConnectionOpenCLKernel::ReSize() { return RET_OK; } - void FullConnectionOpenCLKernel::PadWeight() { // ABMCI @ ABCICO = ABMCO auto allocator = ocl_runtime_->GetAllocator(); @@ -177,39 +170,6 @@ void FullConnectionOpenCLKernel::PadWeight() { allocator->UnmapBuffer(bias_); } -int FullConnectionOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - auto out_shape = out_tensors_[0]->shape(); - int n = 1, h = 1, w = 1, c = 1; - if (out_tensors_[0]->shape().size() == 2) { - n = out_shape[0]; - c = out_shape[1]; - } else { - n = out_shape[0]; - h = out_shape[1]; - w = out_shape[2]; - c = out_shape[3]; - } - if (op_format_ == schema::Format_NHWC4) { - im_dst_x = w * UP_DIV(c, C4NUM); - im_dst_y = n * h; - } else if (op_format_ == schema::Format_NC4HW4) { - im_dst_x = w; - im_dst_y = n * UP_DIV(c, C4NUM) * h; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - int FullConnectionOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; // local size should less than MAX_GROUP_SIZE diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h index 98de476ab5..ce6cd4ceee 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h @@ -26,21 +26,20 @@ namespace mindspore::kernel { class FullConnectionOpenCLKernel : public OpenCLKernel { public: - explicit FullConnectionOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + FullConnectionOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~FullConnectionOpenCLKernel() override{}; + ~FullConnectionOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - void PadWeight(); - int GetImageSize(size_t idx, std::vector *img_size) override; private: + void PadWeight(); + cl::Kernel kernel_; - void *padWeight_; - void *bias_; + void *padWeight_{nullptr}; + void *bias_{nullptr}; bool enable_fp16_{false}; bool transposeA{false}; bool transposeB{true}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc index e2cd2e2b64..42a8b56fda 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc @@ -31,25 +31,10 @@ using mindspore::schema::PrimitiveType_Gather; namespace mindspore::kernel { int GatherOpenCLKernel::Init() { - std::string kernel_name = "gather"; - auto in_format = op_format_; - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); + std::string kernel_name = "gather_NHWC4"; #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else { - kernel_name += "_NHWC4"; - } std::set build_options; std::string source = gather_source; std::string program_name = "gather"; @@ -99,29 +84,6 @@ int GatherOpenCLKernel::InitBuffer() { return RET_OK; } -int GatherOpenCLKernel::ReSize() { return RET_OK; } - -int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = std::move(vec); - return RET_OK; -} - int GatherOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h index 1b20cc332b..a20870fd31 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h @@ -25,25 +25,19 @@ namespace mindspore::kernel { class GatherOpenCLKernel : public OpenCLKernel { public: - explicit GatherOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs), indices_data_(nullptr) {} + GatherOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : OpenCLKernel(parameter, inputs, outputs) {} - ~GatherOpenCLKernel() override{}; + ~GatherOpenCLKernel() override = default; int Init() override; - - int ReSize() override; - int Run() override; - - int GetImageSize(size_t idx, std::vector *img_size) override; - - int InitBuffer(); + int InitBuffer() override; private: cl::Kernel kernel_; - int32_t *indices_data_; + int32_t *indices_data_{nullptr}; }; } // namespace mindspore::kernel #endif diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index c4b69ed6c2..de2194d311 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -31,8 +31,7 @@ using mindspore::schema::PrimitiveType_MatMul; namespace mindspore::kernel { int MatMulOpenCLKernel::Init() { - std::string kernel_name = "MatMul"; - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + std::string kernel_name = "MatMul_NHWC4"; auto param = reinterpret_cast(op_parameter_); transposeA = param->a_transpose_; if (transposeA) { @@ -64,16 +63,10 @@ int MatMulOpenCLKernel::Init() { #endif PadWeight(); - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return mindspore::lite::RET_OK; } -int MatMulOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - void MatMulOpenCLKernel::PadWeight() { // ABMCI @ ABCICO = ABMCO auto allocator = ocl_runtime_->GetAllocator(); @@ -137,39 +130,6 @@ void MatMulOpenCLKernel::PadWeight() { allocator->UnmapBuffer(padWeight_); } -int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - auto out_shape = out_tensors_[0]->shape(); - int n = 1, h = 1, w = 1, c = 1; - if (dims == 2) { - n = out_shape[0]; - c = out_shape[1]; - } else if (dims == 4) { - n = out_shape[0]; - h = out_shape[1]; - w = out_shape[2]; - c = out_shape[3]; - } - if (op_format_ == schema::Format_NHWC4) { - im_dst_x = w * UP_DIV(c, C4NUM); - im_dst_y = n * h; - } else if (op_format_ == schema::Format_NC4HW4) { - im_dst_x = w; - im_dst_y = n * UP_DIV(c, C4NUM) * h; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return mindspore::lite::RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - int MatMulOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; // local size should less than MAX_GROUP_SIZE diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h index 43c0b339aa..8c41c2fd58 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h @@ -26,27 +26,26 @@ namespace mindspore::kernel { class MatMulOpenCLKernel : public OpenCLKernel { public: - explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs), inShape(MAX_DIMS, 1), outShape(MAX_DIMS, 1) {} - ~MatMulOpenCLKernel() override{}; + MatMulOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : OpenCLKernel(parameter, inputs, outputs) {} + ~MatMulOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - void PadWeight(); - int GetImageSize(size_t idx, std::vector *img_size) override; private: + void PadWeight(); + cl::Kernel kernel_; - void *padWeight_; + void *padWeight_{nullptr}; bool enable_fp16_{false}; bool transposeA{false}; bool transposeB{true}; - int dims; - static constexpr int MAX_DIMS = 4; // max supported matmul dims - std::vector inShape; - std::vector outShape; + int dims{}; + static constexpr int MAX_DIMS{4}; // max supported matmul dims + std::vector inShape{std::vector(MAX_DIMS, 1)}; + std::vector outShape{std::vector(MAX_DIMS, 1)}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc index 2150d5d9d3..77d5dfd966 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc @@ -30,10 +30,6 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; using mindspore::schema::PaddingMode_CONSTANT; using mindspore::schema::PrimitiveType_Pad; -using mindspore::schema::Format::Format_NC4HW4; -using mindspore::schema::Format::Format_NCHW; -using mindspore::schema::Format::Format_NHWC; -using mindspore::schema::Format::Format_NHWC4; namespace mindspore::kernel { @@ -41,9 +37,6 @@ int PadOpenCLKernel::Init() { auto param = reinterpret_cast(op_parameter_); std::set build_options; - if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { - MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; - } if (in_tensors_.empty()) { MS_LOG(ERROR) << "PadOpenCLKernel in_tensors is empty"; return RET_ERROR; @@ -63,10 +56,6 @@ int PadOpenCLKernel::Init() { auto input_tensor = in_tensors_[0]; auto output_tensor = out_tensors_[0]; - in_ori_format_ = input_tensor->GetFormat(); - out_ori_format_ = output_tensor->GetFormat(); - input_tensor->SetFormat(op_format_); - output_tensor->SetFormat(op_format_); CI_ = input_tensor->Channel(); IH_ = input_tensor->Height(); @@ -78,8 +67,8 @@ int PadOpenCLKernel::Init() { CO_SLICES_ = UP_DIV(CO_, C4NUM); const std::string source = pad_source; - const std::string kernel_name = op_format_ == Format_NHWC4 ? "Pad_NHWC4" : "Pad_NC4HW4"; const std::string program_name = "Pad"; + const std::string kernel_name = "Pad_NHWC4"; ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); @@ -87,30 +76,6 @@ int PadOpenCLKernel::Init() { return RET_OK; } -int PadOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == Format_NHWC4) { - if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { - { - im_dst_x = OW_ * CO_SLICES_; - im_dst_y = OH_; - } - } else { - im_dst_x = OH_ * CO_SLICES_; - im_dst_y = OW_; - } - } else { - im_dst_y = OH_ * CO_SLICES_; - im_dst_x = OW_; - } - size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; - img_size->clear(); - img_size->push_back(im_dst_x); - img_size->push_back(im_dst_y); - img_size->push_back(img_dtype); - return RET_OK; -} - int PadOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h index eaf2435706..e274689d3b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h @@ -28,16 +28,16 @@ namespace mindspore::kernel { class PadOpenCLKernel : public OpenCLKernel { public: - explicit PadOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + PadOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~PadOpenCLKernel() override{}; + ~PadOpenCLKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; private: + cl::Kernel kernel_; int CI_{}; int IH_{}; int IW_{}; @@ -46,7 +46,6 @@ class PadOpenCLKernel : public OpenCLKernel { int OW_{}; int CI_SLICES_{}; int CO_SLICES_{}; - cl::Kernel kernel_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 6d28ddf090..cc071ef086 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -72,7 +72,7 @@ int PoolingOpenCLKernel::Init() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + kernel_name += "_NHWC4"; if (out_mem_type_ == OpenCLMemType::BUF) { MS_LOG(ERROR) << "buffer output not support yet."; return mindspore::lite::RET_ERROR; @@ -83,10 +83,6 @@ int PoolingOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return mindspore::lite::RET_OK; @@ -100,36 +96,6 @@ std::vector PoolingOpenCLKernel::InitGlobalSize() const { return global; } -int PoolingOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - int n = out_tensors_[0]->shape()[0]; - int h = out_tensors_[0]->shape()[1]; - int w = out_tensors_[0]->shape()[2]; - int c = out_tensors_[0]->shape()[3]; - if (op_format_ == schema::Format::Format_NHWC4) { - im_dst_x = w * UP_DIV(c, C4NUM); - im_dst_y = n * h; - } else if (op_format_ == schema::Format::Format_NC4HW4) { - im_dst_x = w; - im_dst_y = n * UP_DIV(c, C4NUM) * h; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return mindspore::lite::RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - -int PoolingOpenCLKernel::InitBuffer() { return mindspore::lite::RET_OK; } - -int PoolingOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - int PoolingOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h index 3c472f1729..dbc735a402 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h @@ -26,18 +26,13 @@ namespace mindspore::kernel { class PoolingOpenCLKernel : public OpenCLKernel { public: - explicit PoolingOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs) { - parameter_ = reinterpret_cast(parameter); - } - ~PoolingOpenCLKernel() override{}; + PoolingOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast(parameter)) {} + ~PoolingOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - int InitBuffer(); - int GetImageSize(size_t idx, std::vector *img_size) override; private: std::vector InitGlobalSize() const; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 552273bfcb..6a3f22a77d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -33,7 +33,7 @@ using mindspore::schema::PrimitiveType_PReLU; namespace mindspore::kernel { -void PReluOpenCLKernel::InitBuffer() { +int PReluOpenCLKernel::InitBuffer() { auto allocator = ocl_runtime_->GetAllocator(); auto weight_tensor = in_tensors_[1]; if (weight_is_scalar) { @@ -71,6 +71,7 @@ void PReluOpenCLKernel::InitBuffer() { } allocator->UnmapBuffer(weight_vector_); } + return RET_OK; } int PReluOpenCLKernel::Init() { @@ -84,10 +85,6 @@ int PReluOpenCLKernel::Init() { C_ = input_tensor->Channel(); H_ = input_tensor->Height(); W_ = input_tensor->Width(); - if (input_tensor->GetFormat() != schema::Format_NC4HW4 && input_tensor->GetFormat() != schema::Format_NHWC4) { - MS_LOG(ERROR) << "PRelu only support Format_NC4HW4 and Format_NHWC4"; - return mindspore::lite::RET_ERROR; - } if (batch_size_ != 1) { MS_LOG(ERROR) << "Init PRelu kernel failed: Unsupported multi-batch."; return RET_ERROR; @@ -104,12 +101,7 @@ int PReluOpenCLKernel::Init() { MS_LOG(ERROR) << "PRelu weight must be float32 or float16"; return RET_ERROR; } - enable_fp16_ = ocl_runtime_->GetFp16Enable(); - in_ori_format_ = input_tensor->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - input_tensor->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); std::set build_options; std::string source = prelu_source; @@ -137,11 +129,7 @@ int PReluOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_); } ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); - if (op_format_ == schema::Format_NHWC4) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); - } else { // Format_NC4HW4 = 100 - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 100); - } + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); std::vector local = {4, 4, 1}; std::vector global = {static_cast(H_), static_cast(W_), static_cast(CO_SLICES_)}; @@ -153,31 +141,6 @@ int PReluOpenCLKernel::Run() { return mindspore::lite::RET_OK; } -int PReluOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - auto CO_SLICES_ = UP_DIV(C_, C4NUM); - if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { - if (W_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { - { - im_dst_y = batch_size_ * H_; - im_dst_x = W_ * CO_SLICES_; - } - } else { - im_dst_y = W_; - im_dst_x = batch_size_ * H_ * CO_SLICES_; - } - } else { - im_dst_y = batch_size_ * CO_SLICES_ * H_; - im_dst_x = W_; - } - size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; - img_size->clear(); - img_size->push_back(im_dst_x); - img_size->push_back(im_dst_y); - img_size->push_back(img_dtype); - return mindspore::lite::RET_OK; -} - kernel::LiteKernel *OpenCLPReluKernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h index c4ea418ffb..f54b3b5f5e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h @@ -27,15 +27,14 @@ namespace mindspore::kernel { class PReluOpenCLKernel : public OpenCLKernel { public: - explicit PReluOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + PReluOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~PReluOpenCLKernel() override{}; + ~PReluOpenCLKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - void InitBuffer(); + int InitBuffer() override; private: cl::Kernel kernel_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc index 7f2054c90f..b22e4e269d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc @@ -57,7 +57,7 @@ int ReduceOpenCLKernel::Init() { return RET_PARAM_INVALID; } std::string kernel_name = reduce_type2str.at(reduce_param->mode_); - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + kernel_name += "_NHWC4"; enable_fp16_ = ocl_runtime_->GetFp16Enable(); if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { @@ -74,10 +74,6 @@ int ReduceOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return mindspore::lite::RET_OK; } @@ -97,31 +93,6 @@ void ReduceOpenCLKernel::InitNHWCShape() { nhwc_shape_ = {n, h, w, c}; } -int ReduceOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - -int ReduceOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - - if (op_format_ == schema::Format_NHWC4) { - im_dst_x = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); - im_dst_y = nhwc_shape_[0] * nhwc_shape_[1]; - } else if (op_format_ == schema::Format_NC4HW4) { - im_dst_x = nhwc_shape_[2]; - im_dst_y = nhwc_shape_[0] * UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1]; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return mindspore::lite::RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - int ReduceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; std::vector shapex = in_tensors_[0]->shape(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h index 85fc42ea82..7e4780f57a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h @@ -26,15 +26,13 @@ namespace mindspore::kernel { class ReduceOpenCLKernel : public OpenCLKernel { public: - explicit ReduceOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ReduceOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ReduceOpenCLKernel() override{}; + ~ReduceOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; void InitNHWCShape(); private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index d4e71a2c1d..9da40a502d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -31,25 +31,11 @@ using mindspore::schema::PrimitiveType_Squeeze; namespace mindspore::kernel { int ReshapeOpenCLKernel::Init() { - std::string kernel_name = "reshape"; - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); - enable_fp16_ = ocl_runtime_->GetFp16Enable(); + std::string kernel_name = "reshape_NHWC4"; if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { MS_LOG(ERROR) << "Reshape output size should in 2,4"; return RET_ERROR; } - if (in_tensors_[0]->shape().size() == 2) { - inShape = {in_tensors_[0]->shape()[0], 1, 1, in_tensors_[0]->shape()[1]}; - } else { - inShape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], - in_tensors_[0]->shape()[3]}; - } - if (out_tensors_[0]->shape().size() == 2) { - outShape = {out_tensors_[0]->shape()[0], 1, 1, out_tensors_[0]->shape()[1]}; - } else { - outShape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2], - out_tensors_[0]->shape()[3]}; - } #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else @@ -59,55 +45,20 @@ int ReshapeOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int ReshapeOpenCLKernel::ReSize() { return RET_OK; } - -int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - int n = outShape[0]; - int h = outShape[1]; - int w = outShape[2]; - int c = outShape[3]; - if (img_size_.size() == OpenCLImageSizeIndex::IDX_NUM) { - *img_size = img_size_; - return RET_OK; - } - if (op_format_ == schema::Format::Format_NHWC4) { - im_dst_x = w * UP_DIV(c, C4NUM); - im_dst_y = n * h; - } else if (op_format_ == schema::Format::Format_NC4HW4) { - im_dst_x = w; - im_dst_y = n * UP_DIV(c, C4NUM) * h; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - img_size_ = vec; - return RET_OK; -} - int ReshapeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; + auto in = Image2DInfo(in_tensors_.front()); + auto out = Image2DInfo(out_tensors_.front()); std::vector local = {}; - std::vector global{img_size_[0], img_size_[1]}; - cl_int4 src_size = {inShape[3], inShape[2], inShape[1], inShape[0]}; - cl_int4 dst_size = {static_cast(img_size_[0]), static_cast(img_size_[1]), outShape[3], - outShape[3] * outShape[2]}; + std::vector global{out.width, out.height}; + cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)}; + cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)}; + int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h index 4ee7d789d3..31c8e95d07 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h @@ -25,21 +25,16 @@ namespace mindspore::kernel { class ReshapeOpenCLKernel : public OpenCLKernel { public: - explicit ReshapeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ReshapeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ReshapeOpenCLKernel() override{}; + ~ReshapeOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; private: cl::Kernel kernel_; - bool enable_fp16_{false}; - std::vector inShape; - std::vector outShape; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc index 5495be6907..ef862b4320 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc @@ -54,7 +54,7 @@ int ResizeOpenCLKernel::Init() { MS_LOG(ERROR) << "unsupported resize method:" << resize_param->method_; return RET_PARAM_INVALID; } - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + kernel_name += "_NHWC4"; #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else @@ -64,39 +64,10 @@ int ResizeOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int ResizeOpenCLKernel::ReSize() { return RET_OK; } - -int ResizeOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - auto nhwc_shape_ = out_tensors_[0]->shape(); - if (op_format_ == schema::Format_NHWC4) { - im_dst_x = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); - im_dst_y = nhwc_shape_[0] * nhwc_shape_[1]; - } else if (op_format_ == schema::Format_NC4HW4) { - im_dst_x = nhwc_shape_[2]; - im_dst_y = nhwc_shape_[0] * UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1]; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (ocl_runtime_->GetFp16Enable()) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size) { return input_size > 1 && output_size > 1 && alignCorner ? static_cast(input_size - 1) / static_cast(output_size - 1) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h index c1b17b9016..3c59fa4d65 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h @@ -26,21 +26,20 @@ namespace mindspore::kernel { class ResizeOpenCLKernel : public OpenCLKernel { public: - explicit ResizeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ResizeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ResizeOpenCLKernel() override{}; + ~ResizeOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - float getResizeScaleFactor(int input_size, int output_size); private: + float getResizeScaleFactor(int input_size, int output_size); + cl::Kernel kernel_; - bool alignCorner; - bool preserveAspectRatio; + bool alignCorner{false}; + bool preserveAspectRatio{false}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index e0dcd61754..85fae38346 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -46,79 +46,19 @@ ScaleOpenCLKernel::~ScaleOpenCLKernel() { } } -std::vector ScaleOpenCLKernel::InitGlobalSize() const { - const size_t global_x = out_tensors_[0]->Width(); - const size_t global_y = out_tensors_[0]->Height(); - const size_t global_z = UP_ROUND_DIV(out_tensors_[0]->Channel(), C4NUM); - std::vector global = {global_x, global_y, global_z}; - return global; -} - void ScaleOpenCLKernel::Image2dGetWorkGroupSize() { local_size_ = {16, 16}; if (out_tensors_[0]->shape().size() == 2) { size_t H = out_tensors_[0]->shape()[0]; size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); global_size_ = {W, H}; - return; - } - if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { - size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t W = out_tensors_[0]->Width(); - global_size_ = {W, H}; - } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { + } else { size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); global_size_ = {W, H}; - } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { - size_t H = out_tensors_[0]->Batch(); - size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - global_size_ = {W, H}; - } else { - MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); } } -void ScaleOpenCLKernel::BufferGetWorkGroupSize() { - uint32_t element_num = out_tensors_[0]->ElementsC4Num(); - global_size_ = {element_num}; -} - -int ScaleOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - if (out_tensors_[0]->shape().size() == 2) { - im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); - im_dst_y = out_tensors_[0]->shape()[0]; - } else { - if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { - im_dst_x = out_tensors_[0]->Width(); - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); - } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { - im_dst_y = out_tensors_[0]->Batch(); - im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - } else { - MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); - return RET_ERROR; - } - } - - size_t img_dtype = CL_FLOAT; - if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { - img_dtype = CL_HALF_FLOAT; - } else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { - img_dtype = CL_FLOAT; - } else { - MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type(); - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - int ScaleOpenCLKernel::InitBuffer() { if (!element_flag_) { return RET_OK; @@ -127,6 +67,7 @@ int ScaleOpenCLKernel::InitBuffer() { auto allocator = ocl_runtime_->GetAllocator(); std::vector img_size; GetImageSize(0, &img_size); + img_size[2] = in_tensors_[1]->data_type() == kNumberTypeFloat16 ? CL_HALF_FLOAT : CL_FLOAT; if (scale_C_flag_) { img_size[1] = 1; img_size[0] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); @@ -147,57 +88,7 @@ int ScaleOpenCLKernel::InitBuffer() { << in_tensors_[0]->data_type(); return RET_ERROR; } - } else if (in_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { - if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) { - if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { - float *scale = new (std::nothrow) float[pack_weight_size]; - if (scale == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed!"; - return RET_ERROR; - } - float *offset = new (std::nothrow) float[pack_weight_size]; - if (offset == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed!"; - delete[] scale; - return RET_ERROR; - } - std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNC4HW4(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); - PackNHWCToNC4HW4(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); - scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); - offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); - delete[] scale; - delete[] offset; - } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { - float16_t *scale = new (std::nothrow) float16_t[pack_weight_size]; - if (scale == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed!"; - return RET_ERROR; - } - float16_t *offset = new (std::nothrow) float16_t[pack_weight_size]; - if (offset == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed!"; - delete[] scale; - return RET_ERROR; - } - std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNC4HW4(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); - PackNHWCToNC4HW4(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); - scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); - offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); - delete[] scale; - delete[] offset; - } else { - MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " - << in_tensors_[0]->data_type(); - return RET_ERROR; - } - } else { - MS_LOG(ERROR) << "Unsupport format transpose from " << in_tensors_[1]->GetFormat() << "to " - << in_tensors_[0]->GetFormat(); - return RET_ERROR; - } - } else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { + } else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC) { if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) { if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { float *scale = new (std::nothrow) float[pack_weight_size]; @@ -298,18 +189,6 @@ int ScaleOpenCLKernel::Init() { return error_code; } - auto format = op_format_; - if (out_tensors_[0]->shape().size() == 2) { - format = schema::Format_NC4; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(format); - if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) { - in_tensors_[1]->SetFormat(format); - in_tensors_[2]->SetFormat(format); - } - out_tensors_[0]->SetFormat(format); Image2dGetWorkGroupSize(); InitBuffer(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h index 8f8c92adbc..1edadcf219 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h @@ -25,20 +25,17 @@ namespace mindspore::kernel { class ScaleOpenCLKernel : public OpenCLKernel { public: - explicit ScaleOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx) + ScaleOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) : OpenCLKernel(parameter, inputs, outputs) {} ~ScaleOpenCLKernel() override; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; + int InitBuffer() override; private: - std::vector InitGlobalSize() const; void Image2dGetWorkGroupSize(); - void BufferGetWorkGroupSize(); - int InitBuffer(); cl::Kernel kernel_; bool element_flag_{true}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc index a50cda01f7..c8d6b009dc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc @@ -30,55 +30,17 @@ using mindspore::schema::PrimitiveType_Slice; namespace mindspore::kernel { -int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return RET_OK; -} - int SliceOpenCLKernel::Init() { - std::string kernel_name = "slice"; - auto in_format = op_format_; - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(op_format_); - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else if (in_format == schema::Format_NHWC4) { - kernel_name += "_NHWC4"; - } std::set build_options; std::string source = slice_source; std::string program_name = "slice"; + std::string kernel_name = "slice_NHWC4"; ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int SliceOpenCLKernel::ReSize() { return RET_OK; } - void SlcieGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { const int max_divider = 8; const int max_x = 4, max_y = 8; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h index dc3d4771b3..44fb3871d6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h @@ -25,20 +25,16 @@ namespace mindspore::kernel { class SliceOpenCLKernel : public OpenCLKernel { public: - explicit SliceOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + SliceOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~SliceOpenCLKernel() override{}; + ~SliceOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - private: cl::Kernel kernel_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index 469e8db72a..976b4af09a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -77,30 +77,6 @@ int SoftmaxOpenCLKernel::SetWorkGroupSize1x1() { return lite::RET_OK; } -int SoftmaxOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x, im_dst_y; - auto out_shape = out_tensors_[0]->shape(); - int n = nhwc_shape_[0], h = nhwc_shape_[1], w = nhwc_shape_[2], c = nhwc_shape_[3]; - if (op_format_ == schema::Format_NHWC4) { - im_dst_x = w * UP_DIV(c, C4NUM); - im_dst_y = n * h; - } else if (op_format_ == schema::Format_NC4HW4) { - im_dst_x = w; - im_dst_y = n * UP_DIV(c, C4NUM) * h; - } else { - MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); - return mindspore::lite::RET_ERROR; - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - int SoftmaxOpenCLKernel::Init() { std::string kernel_name = "SoftMax"; std::string program_name = "SoftMax"; @@ -131,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() { onexone_flag_ = false; kernel_name += "Axis" + std::to_string(axis_); } - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + kernel_name += "_NHWC4"; #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else @@ -139,10 +115,6 @@ int SoftmaxOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h index 136fdd89a9..516330003a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h @@ -26,26 +26,22 @@ namespace mindspore::kernel { class SoftmaxOpenCLKernel : public OpenCLKernel { public: - explicit SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs) { - parameter_ = reinterpret_cast(parameter); - } + SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) + : OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast(parameter)) {} - ~SoftmaxOpenCLKernel() override{}; + ~SoftmaxOpenCLKernel() override = default; int Init() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; + private: int InitGlobalSize(); int SetWorkGroupSize1x1(); int SetWorkGroupSize(); std::vector GetMaskForLastChannel(int channels); - private: cl::Kernel kernel_; SoftmaxParameter *parameter_; - bool onexone_flag_{false}; std::vector local_size_; std::vector global_size_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc index 4661e8d292..cc3691255d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc @@ -32,18 +32,12 @@ using mindspore::schema::PrimitiveType_SpaceToBatchND; namespace mindspore::kernel { int SpaceToBatchNDOpenCLKernel::Init() { - std::string kernel_name = "space_to_batch_nd"; - auto in_format = op_format_; + std::string kernel_name = "space_to_batch_nd_NHWC4"; if (in_tensors_[0]->shape().size() != 4 && out_tensors_[0]->shape().size() != 4) { MS_LOG(ERROR) << "input/output shape size must be 4, actual: " << in_tensors_[0]->shape().size() << ", " << out_tensors_[0]->shape().size(); return RET_ERROR; } - if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { - MS_LOG(ERROR) << "input format(" << in_format << ") " - << "format not support!"; - return RET_ERROR; - } auto *param = reinterpret_cast(this->op_parameter_); param->need_paddings_ = (param->paddings_[0] | param->paddings_[1] | param->paddings_[2] | param->paddings_[3]); param->padded_in_shape_[kNHWC_N] = in_tensors_[0]->shape().at(kNHWC_N); @@ -60,18 +54,10 @@ int SpaceToBatchNDOpenCLKernel::Init() { return RET_ERROR; } - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - if (in_format == schema::Format_NC4HW4) { - kernel_name += "_NC4HW4"; - } else { - kernel_name += "_NHWC4"; - } + std::set build_options; std::string source = space_to_batch_nd_source; std::string program_name = "space_to_batch_nd"; @@ -81,28 +67,7 @@ int SpaceToBatchNDOpenCLKernel::Init() { MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int SpaceToBatchNDOpenCLKernel::InitBuffer() { return RET_OK; } -int SpaceToBatchNDOpenCLKernel::ReSize() { return RET_OK; } -int SpaceToBatchNDOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t im_dst_x, im_dst_y; - if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { - im_dst_x = out_tensors_[0]->Width() * CO4; - im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); - } else { - im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; - im_dst_x = out_tensors_[0]->Width(); - } - size_t img_dtype = CL_FLOAT; - auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = std::move(vec); - return RET_OK; -} + int SpaceToBatchNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; auto param = reinterpret_cast(this->op_parameter_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h index c0ae18c1b1..06a4ebb520 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h @@ -25,22 +25,16 @@ namespace mindspore::kernel { class SpaceToBatchNDOpenCLKernel : public OpenCLKernel { public: - explicit SpaceToBatchNDOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + SpaceToBatchNDOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~SpaceToBatchNDOpenCLKernel() override{}; + ~SpaceToBatchNDOpenCLKernel() override = default; int Init() override; - int ReSize() override; - int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; - - int InitBuffer(); - private: cl::Kernel kernel_; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index f64d942a27..369ae8a967 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -84,14 +84,6 @@ int ToFormatOpenCLKernel::InitNHWC() { return RET_OK; } -int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t img_height = N_ * H_; - size_t img_width = W_ * UP_DIV(C_, C4NUM); - size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; - *img_size = {img_width, img_height, img_dtype}; - return RET_OK; -} - int ToFormatOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; std::vector global = {N_ * H_, W_, UP_DIV(C_, C4NUM)}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h index bb7b6be945..d1f39f457f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h @@ -25,15 +25,13 @@ namespace mindspore::kernel { class ToFormatOpenCLKernel : public OpenCLKernel { public: - explicit ToFormatOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + ToFormatOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~ToFormatOpenCLKernel() override{}; + ~ToFormatOpenCLKernel() override = default; int Init() override; - int ReSize() override { return mindspore::lite::RET_OK; }; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; private: int InitNHWC(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index b2f9c4f71c..e744da8e13 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -57,7 +57,8 @@ int TransposeOpenCLKernel::Init() { // just for input kernel_name += "_oversize"; } - kernel_name += "_" + std::string(EnumNameFormat(op_format_)); + kernel_name += "_NHWC4"; + #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else @@ -67,37 +68,11 @@ int TransposeOpenCLKernel::Init() { ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif - in_ori_format_ = in_tensors_[0]->GetFormat(); - out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(op_format_); - out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return mindspore::lite::RET_OK; } -int TransposeOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } - -int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - size_t im_dst_x = 1, im_dst_y = 1; - auto out_shape = out_tensors_[0]->shape(); - if (op_format_ == schema::Format_NHWC4) { - im_dst_x = out_shape[2] * UP_DIV(out_shape[3], C4NUM); // W * C4 - im_dst_y = out_shape[0] * out_shape[1]; // N * H - } else if (op_format_ == schema::Format_NC4HW4) { - im_dst_x = out_shape[2]; // W - im_dst_y = out_shape[0] * UP_DIV(out_shape[3], C4NUM) * out_shape[1]; // N * C4 * H - } - size_t img_dtype = CL_FLOAT; - if (enable_fp16_) { - img_dtype = CL_HALF_FLOAT; - } - img_size->clear(); - std::vector vec{im_dst_x, im_dst_y, img_dtype}; - *img_size = vec; - return mindspore::lite::RET_OK; -} - int TransposeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; std::vector shapex = out_tensors_[0]->shape(); @@ -108,12 +83,11 @@ int TransposeOpenCLKernel::Run() { size_t c4 = UP_DIV(c, 4); std::vector local = {}; std::vector global; - if (type == TransposeType::AXIS0312) { + if (type == TransposeType::AXIS0312) { // NHWC -> NCHW global = {UP_DIV(h, C4NUM), w, c4}; - } else if (type == TransposeType::AXIS0231) { + } else if (type == TransposeType::AXIS0231) { // NCHW -> NHWC global = {h, UP_DIV(w, C4NUM), c4}; } - cl_int4 shape = {static_cast(n), static_cast(h), static_cast(w), static_cast(c)}; int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h index e5cc2c4238..e4dcc9c7b6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h @@ -29,15 +29,13 @@ enum class TransposeType { AXIS0312, AXIS0231 }; class TransposeOpenCLKernel : public OpenCLKernel { public: - explicit TransposeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + TransposeOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~TransposeOpenCLKernel() override{}; + ~TransposeOpenCLKernel() override = default; int Init() override; - int ReSize() override; int Run() override; - int GetImageSize(size_t idx, std::vector *img_size) override; private: cl::Kernel kernel_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index d5defc0a7e..796eba99c2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -22,52 +22,120 @@ #include "include/errorcode.h" #include "src/runtime/opencl/opencl_runtime.h" +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + namespace mindspore::kernel { enum class OpenCLMemType { BUF, IMG }; -enum OpenCLImageSizeIndex { IDX_X = 0, IDX_Y, IDX_DTYPE, IDX_NUM }; struct OpenCLToFormatParameter { - OpParameter op_parameter; + OpParameter op_parameter{}; schema::Format src_format{schema::Format::Format_NHWC}; schema::Format dst_format{schema::Format::Format_NHWC4}; OpenCLMemType out_mem_type{OpenCLMemType::IMG}; }; +struct Image2DInfo { + explicit Image2DInfo(const lite::Tensor *tensor) { + if (tensor) { + auto shape = tensor->shape(); + if (shape.size() == 1) { + N = shape[0]; + } else if (shape.size() == 2) { + N = shape[0]; + C = shape[1]; + } else if (shape.size() == 3) { + N = shape[0]; + W = shape[1]; + C = shape[2]; + } else if (shape.size() == 4) { + N = shape[0]; + H = shape[1]; + W = shape[2]; + C = shape[3]; + } else if (shape.size() >= 5) { + MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size(); + } + FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float); + } else { + FLT_size = sizeof(cl_float); + } + + FLT4_size = FLT_size * 4; + Slice = UP_DIV(C, C4NUM); + if (W * Slice <= MAX_IMAGE2D_SIZE) { + height = N * H; + width = W * Slice; + } else { + height = W; + width = N * H * Slice; + } + + auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); + int alignment = runtime_wrapper.GetInstance()->GetImagePitchAlignment(); + row_pitch = (width + alignment - 1) / alignment * alignment * FLT4_size; + + ElementsNum = N * H * W * C; + ElementsC4Num = N * H * W * Slice * C4NUM; + OriginSize = ElementsNum * FLT_size; + Image2DSize = height * width * FLT4_size; + } + + size_t N{1}; + size_t H{1}; + size_t W{1}; + size_t C{1}; + size_t Slice{}; + size_t width{}; + size_t height{}; + size_t FLT_size{}; + size_t FLT4_size{}; + size_t row_pitch{}; + size_t ElementsNum{}; + size_t ElementsC4Num{}; + size_t OriginSize{}; + size_t Image2DSize{}; +}; + class OpenCLKernel : public LiteKernel { public: - explicit OpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + OpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); } + ~OpenCLKernel() override = default; - ~OpenCLKernel() {} + int Init() override { return RET_ERROR; } + int PreProcess() override { return RET_ERROR; } + int ReSize() override { return RET_ERROR; } + int Run() override { return RET_ERROR; } - virtual int Init() { return mindspore::lite::RET_ERROR; } - virtual int PreProcess() { return mindspore::lite::RET_ERROR; } - virtual int InferShape() { return mindspore::lite::RET_ERROR; } - virtual int ReSize() { return mindspore::lite::RET_ERROR; } - virtual int Run() { return mindspore::lite::RET_ERROR; } - virtual int GetImageSize(size_t idx, std::vector *img_size) { return mindspore::lite::RET_ERROR; } - virtual int GetGlobalSize(size_t idx, std::vector *global_size) { return mindspore::lite::RET_ERROR; } + virtual int InitBuffer() { return RET_OK; } + virtual int GetGlobalSize(size_t idx, std::vector *global_size) { return RET_ERROR; } virtual int GetLocalSize(size_t idx, const std::vector &global_size, std::vector *local_size) { - return mindspore::lite::RET_ERROR; + return RET_ERROR; + } + int GetImageSize(size_t idx, std::vector *img_size) { + if (idx >= out_tensors_.size()) { + return RET_ERROR; + } + auto img_info = Image2DInfo(out_tensors_[idx]); + size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; + *img_size = {img_info.width, img_info.height, img_dtype}; + return RET_OK; } + OpenCLMemType GetMemType() { return out_mem_type_; } void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } - void SetFormatType(schema::Format format_type) { op_format_ = format_type; } - schema::Format GetInOriFormat() { return in_ori_format_; } - schema::Format GetOutOriFormat() { return out_ori_format_; } protected: + lite::opencl::OpenCLRuntime *ocl_runtime_; OpenCLMemType out_mem_type_{OpenCLMemType::IMG}; - schema::Format in_ori_format_{schema::Format::Format_NHWC}; - schema::Format out_ori_format_{schema::Format::Format_NHWC4}; - schema::Format op_format_{schema::Format::Format_NHWC4}; + + private: lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_; - lite::opencl::OpenCLRuntime *ocl_runtime_; - std::vector img_size_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index 34b726d8d7..5c14f06ee6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -27,7 +27,7 @@ using mindspore::lite::RET_OK; SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_tensors, - const std::vector> in_kernels, + const std::vector> &in_kernels, std::vector *out_tensors, std::vector *out_parameters, std::vector *out_convert_ops, OpenCLMemType mem_type) { @@ -66,7 +66,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te } auto dst_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC4 : schema::Format::Format_NHWC; auto src_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC : schema::Format::Format_NHWC4; - lite::Tensor *new_tensor = new (std::nothrow) lite::Tensor(); + auto *new_tensor = new (std::nothrow) lite::Tensor(); MS_ASSERT(new_tensor); if (new_tensor == nullptr) { MS_LOG(ERROR) << "SubGraphOpenCLKernel new tensor failed!"; @@ -87,8 +87,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te desc.data_type = kNumberTypeFloat16; new_tensor->set_data_type(kNumberTypeFloat16); } - OpenCLToFormatParameter *parameter = - static_cast(malloc(sizeof(OpenCLToFormatParameter))); + auto *parameter = static_cast(malloc(sizeof(OpenCLToFormatParameter))); MS_ASSERT(parameter); if (parameter == nullptr) { MS_LOG(ERROR) << "SubGraphOpenCLKernel new parameter failed!"; @@ -196,11 +195,12 @@ int SubGraphOpenCLKernel::UpdateTensorDataType() { } return RET_OK; } + int SubGraphOpenCLKernel::MallocTensorWithReuse() { kernel::LiteKernelUtil::InitTensorRefCount(nodes_); for (auto *kernel : nodes_) { MS_ASSERT(nullptr != kernel); - kernel::OpenCLKernel *op_kernel = reinterpret_cast(kernel); + auto *op_kernel = reinterpret_cast(kernel); auto outputs = kernel->out_tensors(); for (auto i = 0; i < outputs.size(); ++i) { auto *output = outputs.at(i); @@ -258,10 +258,10 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector kvec; for (size_t j = 0; j < in_kernels.size(); ++j) { - if (ksets[j].count(in_tensors[i])) { + if (ksets[j].count(in_tensor)) { kvec.emplace_back(in_kernels[j]); } } @@ -272,21 +272,15 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector inputs, const std::vector outputs, - const std::vector inKernels, - const std::vector outKernels, - const std::vector nodes, const lite::InnerContext *ctx = nullptr) + SubGraphOpenCLKernel(const std::vector &inputs, const std::vector &outputs, + const std::vector &inKernels, + const std::vector &outKernels, + const std::vector &nodes, const lite::InnerContext *ctx = nullptr) : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); subgraph_type_ = kGpuSubGraph; @@ -50,21 +50,19 @@ class SubGraphOpenCLKernel : public SubGraphKernel { int ReSize() override; int Run() override; int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }; - int UnInit(); - protected: + private: + int UnInit(); int UpdateTensorDataType(); int MallocTensorWithReuse(); int GenToFormatOp(const std::vector &in_tensors, - const std::vector> in_kernels, + const std::vector> &in_kernels, std::vector *out_tensors, std::vector *out_parameters, std::vector *out_convert_ops, OpenCLMemType mem_type); int GetKernelFromToTensor(const std::vector &in_tensors, const std::vector &in_kernels, std::vector> *out_kernels, bool is_from); - - private: - lite::opencl::OpenCLAllocator *allocator_; + lite::opencl::OpenCLAllocator *allocator_{nullptr}; std::vector in_convert_tensors_; std::vector out_convert_tensors_; std::vector in_parameters_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.cc b/mindspore/lite/src/runtime/kernel/opencl/utils.cc index fc38b78d09..641d6b7031 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.cc @@ -20,6 +20,8 @@ #include #include "src/kernel_registry.h" #include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/kernel/opencl/opencl_kernel.h" +#include "src/common/file_utils.h" using mindspore::lite::KernelRegistrar; @@ -224,57 +226,77 @@ std::string CLErrorCode(cl_int error_code) { } } -void Write2File(void *mem, const std::string &file_name, int size) { - std::fstream os; - os.open(file_name, std::ios::out | std::ios::binary); - os.write(static_cast(mem), size); - os.close(); +int WriteToBin(const std::string &file_path, void *data, size_t size) { + std::ofstream out_file; + + out_file.open(file_path.c_str(), std::ios::binary); + if (!out_file.good()) { + MS_LOG(ERROR) << "file is bad"; + return -1; + } + + if (!out_file.is_open()) { + MS_LOG(ERROR) << "file open failed"; + return -1; + } + out_file.write(reinterpret_cast(data), size); + return 0; } -void PrintTensor(lite::Tensor *tensor, int num, const std::string &out_file) { +void PrintTensor(const lite::Tensor *tensor, OpenCLMemType mem_type, int n, const std::string &out_file) { if (tensor->data_c() == nullptr) { return; } + + Image2DInfo img_info(tensor); + auto size = mem_type == OpenCLMemType::BUF ? img_info.OriginSize : img_info.Image2DSize; + std::vector data(size); auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); auto runtime = runtime_wrapper.GetInstance(); auto allocator = runtime->GetAllocator(); - auto origin_data = tensor->data_c(); runtime->SyncCommandQueue(); - allocator->MapBuffer(origin_data, CL_MAP_READ, nullptr, true); + allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true); + if (mem_type == OpenCLMemType::BUF) { + memcpy(data.data(), tensor->data_c(), img_info.OriginSize); + } else { + auto row_size = img_info.width * img_info.FLT4_size; + for (int i = 0; i < img_info.height; ++i) { + memcpy(reinterpret_cast(data.data()) + i * row_size, + static_cast(tensor->data_c()) + i * img_info.row_pitch, row_size); + } + } + allocator->UnmapBuffer(tensor->data_c()); + printf("shape=("); auto shape = tensor->shape(); - auto N = shape.size() > 0 ? shape[0] : 1; - auto H = shape.size() > 1 ? shape[1] : 1; - auto W = shape.size() > 2 ? shape[2] : 1; - auto C = shape.size() > 3 ? shape[3] : 1; - auto SLICES = UP_DIV(C, C4NUM); - auto ElementsC4Num = N * H * W * UP_ROUND(C, C4NUM); - auto alignment = runtime->GetImagePitchAlignment(); - auto FLT4_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half4) : sizeof(cl_float4); - auto row_pitch = (W * SLICES + alignment - 1) / alignment * alignment * FLT4_size; - auto row_size = W * SLICES * FLT4_size; - std::vector data(N * H * row_size); - for (int i = 0; i < N * H; ++i) { - memcpy(static_cast(data.data()) + i * row_size, static_cast(origin_data) + i * row_pitch, row_size); + for (int i = 0; i < shape.size(); ++i) { + printf("%4d", shape[i]); + if (i + 1 < shape.size()) { + printf(","); + } } + printf(") "); - std::cout << "shape=("; - for (auto x : shape) { - printf("%3d,", x); - } - printf("): "); - for (size_t i = 0; i < num && i < ElementsC4Num; ++i) { - if (tensor->data_type() == kNumberTypeFloat16) - printf("%zu %6.3f | ", i, (reinterpret_cast(data.data()))[i]); - else - printf("%zu %6.3f | ", i, (reinterpret_cast(data.data()))[i]); + auto num = mem_type == OpenCLMemType::BUF ? img_info.ElementsNum : img_info.ElementsC4Num; + for (int i = 0; i < n && i < num; ++i) { + if (tensor->data_type() == kNumberTypeFloat16) { + printf("%d %7.3f | ", i, reinterpret_cast(data.data())[i]); + } else { + printf("%d %7.3f | ", i, reinterpret_cast(data.data())[i]); + } } printf("\n"); if (!out_file.empty()) { - Write2File(data.data(), out_file, data.size()); + WriteToBin(out_file, data.data(), data.size()); + } +} + +void PrintKernelOutput(OpenCLKernel *kernel, int n, const std::string &out_file) { + printf("%-30s", kernel->name().c_str()); + if (!kernel->out_tensors().empty()) { + PrintTensor(kernel->out_tensors()[0], kernel->GetMemType(), n, out_file); } - allocator->UnmapBuffer(origin_data); } std::vector GetNHWCShape(const std::vector &tensor_shape) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.h b/mindspore/lite/src/runtime/kernel/opencl/utils.h index 03931ffb56..9e5808d816 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.h +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.h @@ -24,6 +24,8 @@ #include "nnacl/op_base.h" #include "src/lite_kernel.h" #include "src/common/utils.h" +#include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/kernel/opencl/opencl_kernel.h" namespace mindspore::lite { kernel::LiteKernel *GetOpenCLKernel(const std::vector &in_tensors, const std::vector &out_tensors, @@ -44,9 +46,11 @@ std::vector GetCommonLocalSize(const std::vector &global, int ma std::string CLErrorCode(cl_int error_code); -void Write2File(void *mem, const std::string &file_name, int size); +int WriteToBin(const std::string &file_path, void *data, size_t size); -void PrintTensor(lite::Tensor *tensor, int num = 10, const std::string &out_file = ""); +void PrintTensor(const lite::Tensor *tensor, OpenCLMemType mem_type, int n = 10, const std::string &out_file = ""); + +void PrintKernelOutput(OpenCLKernel *kernel, int n = 10, const std::string &out_file = ""); std::vector GetNHWCShape(const std::vector &tensor_shape); diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/opencl/opencl_executor.cc index 57d4f508b0..50dc1ba7f4 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc @@ -37,7 +37,7 @@ int OpenCLExecutor::Run(std::vector &inputs, std::vector &ou MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name(); } } - kernel::OpenCLKernel *op_kernel = reinterpret_cast(kernel); + auto *op_kernel = reinterpret_cast(kernel); auto cur_outputs = kernel->out_tensors(); for (auto i = 0; i < cur_outputs.size(); ++i) { auto *output = cur_outputs.at(i); diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc index f6bb741260..7d3594c01c 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc @@ -23,7 +23,6 @@ #include #include #include "src/common/log_adapter.h" -#include "src/runtime/opencl/opencl_runtime.h" namespace mindspore::lite::opencl { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc index 135a6f7611..4f11b2e94e 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc @@ -124,7 +124,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { param->type_ = ActivationType_RELU; auto *kernel = new (std::nothrow) kernel::ActivationOpenClKernel(reinterpret_cast(param), inputs, outputs); - kernel->SetFormatType(op_format); if (kernel == nullptr) { MS_LOG(ERROR) << "Kernel:Relu create fail."; delete param; @@ -241,7 +240,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { delete output_tensor; return; } - kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; @@ -352,7 +350,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { delete output_tensor; return; } - kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; @@ -464,7 +461,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { delete output_tensor; return; } - kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; @@ -573,7 +569,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { delete output_tensor; return; } - kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc index c4bd688813..c06909743e 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc @@ -96,7 +96,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { delete param; return; } - arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4); arithmeticself_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { @@ -185,7 +184,6 @@ TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) { delete param; return; } - arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4); arithmeticself_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { @@ -280,7 +278,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfdim2Fp16) { delete param; return; } - arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4); arithmeticself_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc index d9cc475a76..2755067fa8 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc @@ -142,7 +142,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { delete param; return; } - biasadd_kernel->SetFormatType(op_format); auto ret = biasadd_kernel->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "biasadd kernel init error."; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc index c5b3300590..bb92f4b4a3 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc @@ -84,7 +84,6 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { delete param; return; } - cast_kernel->SetFormatType(schema::Format_NC4HW4); cast_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { @@ -171,7 +170,6 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { delete param; return; } - cast_kernel->SetFormatType(schema::Format_NC4HW4); cast_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc index 06b17f0d61..758a3762cc 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc @@ -103,7 +103,6 @@ TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { delete param; return; } - concat_kernel->SetFormatType(schema::Format_NC4HW4); concat_kernel->Init(); // to do allocate memory for inputs for (auto &input_tensor : inputs) { @@ -220,7 +219,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_4input_dim4_axis1) { delete param; return; } - concat_kernel->SetFormatType(schema::Format_NC4HW4); concat_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { @@ -344,7 +342,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_3input_dim4_axis1) { delete param; return; } - concat_kernel->SetFormatType(schema::Format_NC4HW4); concat_kernel->Init(); // to do allocate memory for inputs for (auto &input_tensor : inputs) { @@ -473,7 +470,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_6input_dim4_axis1) { delete param; return; } - concat_kernel->SetFormatType(schema::Format_NC4HW4); concat_kernel->Init(); // to do allocate memory for inputs and outputs for (auto &input_tensor : inputs) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc index 66f90d6f93..67185d8718 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc @@ -170,7 +170,6 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma std::vector inputs{&input, &weight, &bias}; std::vector outputs{&output}; auto kernel = std::make_unique(reinterpret_cast(param), inputs, outputs); - kernel->SetFormatType(op_format); kernel->Init(); MS_LOG(DEBUG) << "create SubGraph"; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc index f9b92731ef..b952c5fe67 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc @@ -101,7 +101,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat delete[] packed_input; return; } - pKernel->SetFormatType(format); pKernel->Init(); std::vector kernels{pKernel.release()}; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc index 7e68dbc36f..0eb9a75d90 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc @@ -55,7 +55,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vectorSetFormatType(schema::Format_NC4HW4); pkernel->Init(); // to do allocate memory for inputs and outputs diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc index 12f3d95f09..481839bb14 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc @@ -60,7 +60,6 @@ void TEST_MAIN(PadParameter *param, Format input_format, Format output_format, F if (kernel == nullptr) { return; } - kernel->SetFormatType(op_format); kernel->Init(); MS_LOG(DEBUG) << "create SubGraph"; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc index 97a7db6dba..6b99182280 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc @@ -141,7 +141,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { delete param; return; } - prelu_kernel->SetFormatType(op_format); auto ret = prelu_kernel->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init prelu kernel error";