From c659f35265e369d48aa1d9101ff1ea9e3952a03c Mon Sep 17 00:00:00 2001 From: liuzhongkai Date: Wed, 2 Sep 2020 01:23:17 -0700 Subject: [PATCH] activation support NC4HW4 in opencl --- .../src/runtime/kernel/opencl/cl/biasadd.cl | 32 +++++----- .../src/runtime/kernel/opencl/cl/prelu.cl | 49 ++++++++++------ .../kernel/opencl/kernel/activation.cc | 20 +++---- .../runtime/kernel/opencl/kernel/biasadd.cc | 58 +++++++++---------- .../runtime/kernel/opencl/kernel/biasadd.h | 3 +- .../src/runtime/kernel/opencl/kernel/prelu.cc | 48 +++++++-------- .../src/runtime/kernel/opencl/kernel/prelu.h | 2 + .../runtime/kernel/opencl/activation_tests.cc | 38 ++++++------ .../runtime/kernel/opencl/biasadd_tests.cc | 14 ++--- .../src/runtime/kernel/opencl/prelu_tests.cc | 8 ++- 10 files changed, 140 insertions(+), 132 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl index df5eac49d3..0000878e1d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/biasadd.cl @@ -4,20 +4,26 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; __kernel void BiasAdd(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape, - __read_only image2d_t alpha, const int dim) { - int C = input_shape.w; // channel size + __read_only image2d_t alpha, const int data_type) { + int H = input_shape.y; + int C = input_shape.w; // channel size + C = UP_DIV(C, C4NUM); + if ((C == 0 || H == 0) && data_type != 1) { + return; + } int Y = get_global_id(0); // height id int X = get_global_id(1); // weight id - for (int num = 0; num < UP_DIV(C, C4NUM); ++num) { - FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X * UP_DIV(C, C4NUM) + num, Y)); // NHWC4: H WC - FLT4 tmp = in_c4; - int index = 0; - if (dim == 2) { - index = X; - } else { - index = num; - } - tmp += READ_IMAGE(alpha, smp_zero, (int2)(index, 0)); - WRITE_IMAGE(output, (int2)(X * UP_DIV(C, C4NUM) + num, Y), tmp); // NHWC4: H WC + + FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y)); + FLT4 tmp = in_c4; + int index = 0; + if (data_type == 1) { // NC + index = X; + } else if (data_type == 2) { // NHWC4 + index = X % C; + } else { // NC4HW4 + index = Y / H; } + tmp += READ_IMAGE(alpha, smp_zero, (int2)(index, 0)); + WRITE_IMAGE(output, (int2)(X, Y), tmp); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl index 608c232caa..df0bcd75e1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/prelu.cl @@ -4,27 +4,38 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; __kernel void PRelu(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape, - __read_only image2d_t alpha, const int dim) { + __read_only image2d_t alpha, const int data_type, const int bias_dim) { + int H = input_shape.y; int C = input_shape.w; // channel size - + C = UP_DIV(C, SLICES); + if (C == 0 || H == 0) { + return; + } int Y = get_global_id(0); // height id int X = get_global_id(1); // weight id - for (int num = 0; num < UP_DIV(C, SLICES); ++num) { - FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X * UP_DIV(C, SLICES) + num, Y)); // NHWC4: H WC - FLT4 tmp; - if (dim == 1) { - FLT4 weight = READ_IMAGE(alpha, smp_zero, (int2)(0, 0)); - tmp.x = in_c4.x > 0.0f ? in_c4.x : in_c4.x * weight.x; - tmp.y = in_c4.y > 0.0f ? in_c4.y : in_c4.y * weight.x; - tmp.z = in_c4.z > 0.0f ? in_c4.z : in_c4.z * weight.x; - tmp.w = in_c4.w > 0.0f ? in_c4.w : in_c4.w * weight.x; - } else { - FLT4 weight = READ_IMAGE(alpha, smp_zero, (int2)(num, 0)); - tmp.x = in_c4.x > 0.0f ? in_c4.x : in_c4.x * weight.x; - tmp.y = in_c4.y > 0.0f ? in_c4.y : in_c4.y * weight.y; - tmp.z = in_c4.z > 0.0f ? in_c4.z : in_c4.z * weight.z; - tmp.w = in_c4.w > 0.0f ? in_c4.w : in_c4.w * weight.w; - } - WRITE_IMAGE(output, (int2)(X * UP_DIV(C, SLICES) + num, Y), tmp); // NHWC4: H WC + FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y)); + FLT4 tmp; + int index = 0; + if (data_type == 1) { // NHWC4 + index = X % C; + } else if (data_type == 2) { // NC4HW4 + index = Y / H; + } else { + return; + } + if (bias_dim == 1) { + index = 0; + } + FLT4 weight = READ_IMAGE(alpha, smp_zero, (int2)(index, 0)); + FLT4 bias = weight; + if (bias_dim == 1) { + bias.y = weight.x; + bias.z = weight.x; + bias.w = weight.x; } + tmp.x = in_c4.x > 0.0f ? in_c4.x : in_c4.x * bias.x; + tmp.y = in_c4.y > 0.0f ? in_c4.y : in_c4.y * bias.y; + tmp.z = in_c4.z > 0.0f ? in_c4.z : in_c4.z * bias.z; + tmp.w = in_c4.w > 0.0f ? in_c4.w : in_c4.w * bias.w; + WRITE_IMAGE(output, (int2)(X, Y), tmp); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index 57fa9e31d7..d38b43da44 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -77,20 +77,10 @@ int ActivationOpenClKernel::Init() { std::set build_options; ocl_runtime->LoadSource(Program_Kernel[type_][0], source); ocl_runtime->BuildKernel(kernel_, Program_Kernel[type_][0], Program_Kernel[type_][1], build_options); - - std::map format{{4, schema::Format_NHWC4}, {2, schema::Format_NC4}}; - if (format.count(out_size_) == 0) { - MS_LOG(ERROR) << "Not found output tensor format"; - return RET_ERROR; - } in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(format[in_size_]); - out_tensors_[0]->SetFormat(format[out_size_]); - if (in_size_ == 2) { - in_ori_format_ = schema::Format_NC4; - out_ori_format_ = schema::Format_NC4; - } + in_tensors_[0]->SetFormat(op_format_); + out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << op_parameter_->name_ << " init Done!"; return RET_OK; } @@ -121,11 +111,15 @@ cl_int4 ActivationOpenClKernel::GetImg2dShape() { for (int i = 0; i < in_size_; ++i) { img2d_shape.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i]; } - if (in_size_ == 2) { + if (op_format_ == schema::Format_NC4) { img2d_shape.s[1] = img2d_shape.s[2]; img2d_shape.s[2] = UP_DIV(img2d_shape.s[3], C4NUM); img2d_shape.s[3] = C4NUM; } + if (op_format_ == schema::Format_NC4HW4) { + img2d_shape.s[1] = UP_DIV(img2d_shape.s[3], C4NUM) * img2d_shape.s[1]; // UP(c / 4) * H + img2d_shape.s[3] = C4NUM; + } return img2d_shape; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc index 89153e5aef..8c7e8d766c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc @@ -54,6 +54,9 @@ void BiasAddOpenCLKernel::InitBuffer() { int BiasAddOpenCLKernel::Init() { in_size_ = in_tensors_[0]->shape().size(); out_size_ = out_tensors_[0]->shape().size(); + for (int i = 0; i < in_size_; ++i) { + input_shape_.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i]; + } auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); enable_fp16_ = ocl_runtime->GetFp16Enable(); fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); @@ -77,33 +80,26 @@ int BiasAddOpenCLKernel::Init() { in_ori_format_ = in_tensors_[0]->GetFormat(); out_ori_format_ = out_tensors_[0]->GetFormat(); - std::map format{{4, schema::Format_NHWC4}, {2, schema::Format_NC4}}; - if (format.count(out_size_) == 0) { - MS_LOG(ERROR) << "Not found output tensor format"; - return RET_ERROR; - } - in_tensors_[0]->SetFormat(format[in_size_]); - out_tensors_[0]->SetFormat(format[out_size_]); - if (in_size_ == 2) { - in_ori_format_ = format[in_size_]; - out_ori_format_ = format[out_size_]; - } + in_tensors_[0]->SetFormat(op_format_); + out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << program_name << " Init Done!"; return RET_OK; } int BiasAddOpenCLKernel::Run() { - cl_int4 input_shape = GetImg2dShape(); + cl_int4 global_size = GetGlobalshape(); MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); int arg_idx = 0; + std::map data_type{ + {schema::Format_NC4, 1}, {schema::Format_NHWC4, 2}, {schema::Format_NC4HW4, 3}}; ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_size_); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); std::vector local = {1, 1}; - std::vector global = {static_cast(input_shape.s[1]), static_cast(input_shape.s[2])}; + std::vector global = {static_cast(global_size.s[1]), static_cast(global_size.s[2])}; auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; @@ -112,29 +108,29 @@ int BiasAddOpenCLKernel::Run() { return RET_OK; } -cl_int4 BiasAddOpenCLKernel::GetImg2dShape() { - cl_int4 img2d_shape = {0, 0, 0, 0}; - for (int i = 0; i < in_size_; ++i) { - img2d_shape.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i]; +cl_int4 BiasAddOpenCLKernel::GetGlobalshape() { + cl_int4 global_shape = input_shape_; + if (op_format_ == schema::Format_NC4) { + global_shape.s[1] = global_shape.s[2]; + global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM); } - if (in_size_ == 2) { - img2d_shape.s[1] = img2d_shape.s[2]; - img2d_shape.s[2] = UP_DIV(img2d_shape.s[3], C4NUM); - img2d_shape.s[3] = C4NUM; + if (op_format_ == schema::Format_NC4HW4) { + global_shape.s[1] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[1]; // c / 4 * H } - return img2d_shape; + if (op_format_ == schema::Format_NHWC4) { + global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2]; + } + return global_shape; } int BiasAddOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - cl_int4 img_shape = GetImg2dShape(); -#ifdef ENABLE_FP16 - size_t img_dtype = CL_HALF_FLOAT; -#else + cl_int4 img_shape = GetGlobalshape(); size_t img_dtype = CL_FLOAT; -#endif - + if (enable_fp16_) { + img_dtype = CL_HALF_FLOAT; + } img_size->clear(); - img_size->push_back(img_shape.s[2] * UP_DIV(img_shape.s[3], C4NUM)); + img_size->push_back(img_shape.s[2]); img_size->push_back(img_shape.s[1]); img_size->push_back(img_dtype); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h index b3c4ba80c2..9d95089a11 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h @@ -38,7 +38,7 @@ class BiasAddOpenCLKernel : public OpenCLKernel { int Run() override; int GetImageSize(size_t idx, std::vector *img_size) override; void InitBuffer(); - cl_int4 GetImg2dShape(); + cl_int4 GetGlobalshape(); private: cl::Kernel kernel_; @@ -46,6 +46,7 @@ class BiasAddOpenCLKernel : public OpenCLKernel { int in_size_; int out_size_; size_t fp_size; + cl_int4 input_shape_; bool enable_fp16_{false}; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index c8385fcb2a..e5a7d35a8d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -18,6 +18,7 @@ #include #include +#include #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -62,6 +63,9 @@ int PReluOpenCLKernel::Init() { << C_Weight << " and your input channel size is " << C; return RET_ERROR; } + for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { + input_shape_.s[i] = in_tensors_[0]->shape()[i]; + } std::set build_options; std::string source = prelu_source; std::string program_name = "PRelu"; @@ -73,31 +77,26 @@ int PReluOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(schema::Format_NHWC4); + in_tensors_[0]->SetFormat(op_format_); out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(schema::Format_NHWC4); + out_tensors_[0]->SetFormat(op_format_); MS_LOG(DEBUG) << program_name << " init Done!"; return RET_OK; } int PReluOpenCLKernel::Run() { MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; - int N = in_tensors_[0]->shape()[0]; - int H = in_tensors_[0]->shape()[1]; - int W = in_tensors_[0]->shape()[2]; - int C = in_tensors_[0]->shape()[3]; - cl_int4 input_shape = {N, H, W, C}; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); + std::map data_type{{schema::Format_NHWC4, 1}, {schema::Format_NC4HW4, 2}}; int arg_idx = 0; ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); ocl_runtime->SetKernelArg(kernel_, arg_idx++, reinterpret_cast(in_tensors_[1]->shape()[0])); - std::vector local = {1, 1}; - std::vector global = {static_cast(H), static_cast(W)}; + std::vector global = {static_cast(global_shape_.s[1]), static_cast(global_shape_.s[2])}; auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; @@ -107,19 +106,22 @@ int PReluOpenCLKernel::Run() { } int PReluOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { - int H = in_tensors_[0]->shape()[1]; - int W = in_tensors_[0]->shape()[2]; - int C = in_tensors_[0]->shape()[3]; - -#ifdef ENABLE_FP16 - size_t img_dtype = CL_HALF_FLOAT; -#else size_t img_dtype = CL_FLOAT; -#endif - + if (enable_fp16_) { + img_dtype = CL_HALF_FLOAT; + } + global_shape_ = input_shape_; + if (op_format_ == schema::Format_NC4HW4) { + global_shape_.s[1] = UP_DIV(input_shape_.s[3], C4NUM) * input_shape_.s[1]; + } else if (op_format_ == schema::Format_NHWC4) { + global_shape_.s[2] = UP_DIV(input_shape_.s[3], C4NUM) * input_shape_.s[2]; + } else { + MS_LOG(ERROR) << "op_format_:" << op_format_ << " is do not support!"; + return RET_ERROR; + } img_size->clear(); - img_size->push_back(W * UP_DIV(C, C4NUM)); - img_size->push_back(H); + img_size->push_back(global_shape_.s[2]); + img_size->push_back(global_shape_.s[1]); img_size->push_back(img_dtype); return RET_OK; } @@ -128,7 +130,7 @@ kernel::LiteKernel *OpenCLPReluKernelCreator(const std::vector &outputs, OpParameter *opParameter, const lite::Context *ctx, const kernel::KernelKey &desc, const lite::PrimitiveC *primitive) { - if (inputs.size() == 0) { + if (inputs.empty()) { MS_LOG(ERROR) << "Input data size must be greater than 0, but your size is " << inputs.size(); return nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h index d9cdb8137d..3535ba4aa4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h @@ -41,6 +41,8 @@ class PReluOpenCLKernel : public OpenCLKernel { private: cl::Kernel kernel_; void *PReluWeight_; + cl_int4 input_shape_; + cl_int4 global_shape_; size_t fp_size; bool enable_fp16_{false}; }; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc index 13608a5e2f..e95c7fa838 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc @@ -51,7 +51,7 @@ void CompareRes(lite::tensor::Tensor *output_tensor, const std::string &standard auto *output_data = reinterpret_cast(output_tensor->Data()); size_t output_size = output_tensor->Size(); auto expect_data = reinterpret_cast(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); - constexpr float atol = 0.0002; + constexpr float atol = 0.001; for (int i = 0; i < output_tensor->ElementsNum(); ++i) { if (std::fabs(output_data[i] - expect_data[i]) > atol) { printf("error at idx[%d] expect=%f output=%f\n", i, expect_data[i], output_data[i]); @@ -88,10 +88,8 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { bool enable_fp16 = ocl_runtime->GetFp16Enable(); MS_LOG(INFO) << "Init tensors."; std::vector input_shape = {1, 9}; - schema::Format format = schema::Format_NHWC; - if (input_shape.size() == 2) { - format = schema::Format_NC; - } + schema::Format format = schema::Format_NC; + schema::Format op_format = schema::Format_NC4; auto tensor_type = schema::NodeType_ValueNode; auto *input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensor_type); if (input_tensor == nullptr) { @@ -124,6 +122,7 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { param->type_ = ActivationType_RELU; auto *kernel = new (std::nothrow) kernel::ActivationOpenClKernel(reinterpret_cast(param), inputs, outputs); + kernel->SetFormatType(op_format); if (kernel == nullptr) { MS_LOG(ERROR) << "Kernel:Relu create fail."; delete param; @@ -194,17 +193,15 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { std::string out_file = "/data/local/tmp/relu6.bin"; MS_LOG(INFO) << "Relu6 Begin test!"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - auto data_type = kNumberTypeFloat32; + auto data_type = kNumberTypeFloat16; ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); bool enable_fp16 = ocl_runtime->GetFp16Enable(); ocl_runtime->Init(); MS_LOG(INFO) << "Init tensors."; std::vector input_shape = {1, 9}; - schema::Format format = schema::Format_NHWC; - if (input_shape.size() == 2) { - format = schema::Format_NC; - } + schema::Format format = schema::Format_NC; + schema::Format op_format = schema::Format_NC4; auto tensor_type = schema::NodeType_ValueNode; auto *input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensor_type); if (input_tensor == nullptr) { @@ -246,6 +243,7 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { delete output_tensor; return; } + kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; @@ -311,16 +309,14 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { MS_LOG(INFO) << "Sigmoid Begin test!"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); - auto data_type = kNumberTypeFloat16; + auto data_type = kNumberTypeFloat32; ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); bool enable_fp16 = ocl_runtime->GetFp16Enable(); MS_LOG(INFO) << "Init tensors."; std::vector input_shape = {1, 9}; - schema::Format format = schema::Format_NHWC; - if (input_shape.size() == 2) { - format = schema::Format_NC; - } + schema::Format format = schema::Format_NC; + schema::Format op_format = schema::Format_NC4; auto tensor_type = schema::NodeType_ValueNode; auto *input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensor_type); if (input_tensor == nullptr) { @@ -362,6 +358,7 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { delete output_tensor; return; } + kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; @@ -427,17 +424,15 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { MS_LOG(INFO) << "Leaky relu Begin test!"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); - auto data_type = kNumberTypeFloat32; + auto data_type = kNumberTypeFloat16; // need modify ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); bool enable_fp16 = ocl_runtime->GetFp16Enable(); MS_LOG(INFO) << "Init tensors."; - std::vector input_shape = {1, 9}; + std::vector input_shape = {1, 9}; // need modify auto tensor_type = schema::NodeType_ValueNode; - schema::Format format = schema::Format_NHWC; - if (input_shape.size() == 2) { - format = schema::Format_NC; - } + schema::Format format = schema::Format_NC; // need modify + schema::Format op_format = schema::Format_NC4; // need modify auto *input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensor_type); if (input_tensor == nullptr) { MS_LOG(ERROR) << "new input tensor error!"; @@ -479,6 +474,7 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { delete output_tensor; return; } + kernel->SetFormatType(op_format); auto ret = kernel->Init(); if (ret != RET_OK) { delete param; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc index d46b62e7b4..67975ed934 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc @@ -77,20 +77,18 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { MS_LOG(INFO) << "BiasAdd Begin test:"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->Init(); - auto data_type = kNumberTypeFloat16; + auto data_type = kNumberTypeFloat16; // need modify ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); - std::vector input_shape = {1, 9}; - std::vector output_shape = {1, 9}; - + std::vector input_shape = {1, 9}; // need modify + std::vector output_shape = {1, 9}; // need modify auto tensor_type = schema::NodeType_ValueNode; - schema::Format type; + schema::Format type = schema::Format_NC; // need modify + schema::Format op_format = schema::Format_NC4; // need modify int weight_shape = 0; if (input_shape.size() == 4) { weight_shape = input_shape[3]; - type = schema::Format_NHWC; } else { weight_shape = input_shape[1]; - type = schema::Format_NC; } auto *input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, type, tensor_type); if (input_tensor == nullptr) { @@ -144,7 +142,7 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { delete param; return; } - + biasadd_kernel->SetFormatType(op_format); auto ret = biasadd_kernel->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "biasadd kernel init error."; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc index 8ee30bd194..34bfd30689 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc @@ -85,14 +85,15 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { std::vector input_shape = {1, 4, 3, 9}; auto data_type = kNumberTypeFloat16; ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); + schema::Format format = schema::Format_NHWC; + schema::Format op_format = schema::Format_NC4HW4; auto tensor_type = schema::NodeType_ValueNode; - auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); + auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensor_type); if (input_tensor == nullptr) { MS_LOG(ERROR) << "new input_tensor error!"; return; } - auto output_tensor = - new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); + auto output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensor_type); if (output_tensor == nullptr) { MS_LOG(ERROR) << "new output_tensor error"; delete input_tensor; @@ -140,6 +141,7 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { delete param; return; } + prelu_kernel->SetFormatType(op_format); auto ret = prelu_kernel->Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init prelu kernel error";