From 6e04865d944e6e45c16d6d4369dde9f60b88d3a3 Mon Sep 17 00:00:00 2001 From: Pengyongrong Date: Fri, 30 Oct 2020 23:53:25 -0700 Subject: [PATCH] Fusion hswish to activation --- .../runtime/kernel/opencl/cl/activation.cl | 13 +++ .../kernel/opencl/kernel/activation.cc | 4 +- mindspore/lite/test/models_fp32_gpu.cfg | 1 + .../runtime/kernel/opencl/activation_tests.cc | 81 ++++++++++++++++++- 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl index 214d89b232..2d30bb02af 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl @@ -78,3 +78,16 @@ __kernel void Swish(__read_only image2d_t input, __write_only image2d_t output, in_c4 = in_c4 * ((FLT4)(1.f) / ((FLT4)(1.f) + exp(-in_c4))); WRITE_IMAGE(output, (int2)(X, Y), in_c4); } + +__kernel void HSwish(__read_only image2d_t input, __write_only image2d_t output, const int2 img_shape) { + int X = get_global_id(0); // w*c + int Y = get_global_id(1); // n*h + if (X >= img_shape.x || Y >= img_shape.y) return; + FLT4 temp = READ_IMAGE(input, smp_zero, (int2)(X, Y)); + FLT4 result = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); + result.x = temp.x * (temp.x <= -3 ? 0 : (temp.x >= 3 ? 1 : temp.x / 6 + 0.5f)); + result.y = temp.y * (temp.y <= -3 ? 0 : (temp.y >= 3 ? 1 : temp.y / 6 + 0.5f)); + result.z = temp.z * (temp.z <= -3 ? 0 : (temp.z >= 3 ? 1 : temp.z / 6 + 0.5f)); + result.w = temp.w * (temp.w <= -3 ? 0 : (temp.w >= 3 ? 1 : temp.w / 6 + 0.5f)); + WRITE_IMAGE(output, (int2)(X, Y), result); +} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index fd3b6a4a2b..5701ab1da5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -31,6 +31,7 @@ using mindspore::kernel::KERNEL_ARCH::kGPU; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; +using mindspore::schema::ActivationType_HSWISH; using mindspore::schema::ActivationType_LEAKY_RELU; using mindspore::schema::ActivationType_RELU; using mindspore::schema::ActivationType_RELU6; @@ -44,7 +45,8 @@ namespace mindspore::kernel { int ActivationOpenClKernel::Init() { std::map kernel_names{ {ActivationType_LEAKY_RELU, "LeakyRelu"}, {ActivationType_RELU, "Relu"}, {ActivationType_SIGMOID, "Sigmoid"}, - {ActivationType_RELU6, "Relu6"}, {ActivationType_TANH, "Tanh"}, {ActivationType_SWISH, "Swish"}}; + {ActivationType_RELU6, "Relu6"}, {ActivationType_TANH, "Tanh"}, {ActivationType_SWISH, "Swish"}, + {ActivationType_HSWISH, "HSwish"}}; if (kernel_names.count(type_) == 0) { MS_LOG(ERROR) << "schema::ActivationType:" << type_ << "not found"; return mindspore::lite::RET_ERROR; diff --git a/mindspore/lite/test/models_fp32_gpu.cfg b/mindspore/lite/test/models_fp32_gpu.cfg index 545506c7d3..9123df61a8 100644 --- a/mindspore/lite/test/models_fp32_gpu.cfg +++ b/mindspore/lite/test/models_fp32_gpu.cfg @@ -23,3 +23,4 @@ landmark PoseNet_dla_17_x512 age_new plat_isface +efficientnet.mindir diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc index 270a745211..41265efc59 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc @@ -29,6 +29,7 @@ using mindspore::kernel::SubGraphOpenCLKernel; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; using mindspore::lite::Tensor; +using mindspore::schema::ActivationType_HSWISH; using mindspore::schema::ActivationType_LEAKY_RELU; using mindspore::schema::ActivationType_RELU; using mindspore::schema::ActivationType_RELU6; @@ -39,6 +40,7 @@ using mindspore::schema::PrimitiveType_Activation; namespace mindspore { class TestActivationOpenCL : public mindspore::CommonTest {}; + class TestActivationOpenCLTanh : public mindspore::CommonTest {}; void LoadActivationData(void *dst, size_t dst_size, const std::string &file_path) { @@ -622,7 +624,7 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { delete sub_graph; } -TEST_F(TestActivationOpenCL, SwishFp_dim4) { +TEST_F(TestActivationOpenCL, SwishFp16_dim4) { size_t input_size; std::string in_file = "/data/local/tmp/test_data/in_swishfp16.bin"; std::string out_file = "/data/local/tmp/test_data/out_swishfp16.bin"; @@ -703,4 +705,81 @@ TEST_F(TestActivationOpenCL, SwishFp_dim4) { CompareRes(&output_tensor, out_file); delete sub_graph; } + +TEST_F(TestActivationOpenCL, HSwishFp16_dim4) { + MS_LOG(INFO) << " begin test "; + auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); + auto runtime = runtime_wrapper.GetInstance(); + runtime->Init(); + auto allocator = runtime->GetAllocator(); + + std::vector input_shape = {1, 1, 2, 4}; + std::vector output_shape = {1, 1, 2, 4}; + auto data_type = kNumberTypeFloat32; + + auto tensor_type = lite::Tensor::CONST_TENSOR; + schema::Format format = schema::Format_NHWC; + float input_data[] = {-3.0, -2.0, -1.0, 0.0, 1.0, 5.0, 6.0, 7.0}; + float correctOutput[] = {-0, -0.33333334, -0.33333334, 0, 0.6666667, 5, 6, 7}; + + MS_LOG(INFO) << "Init tensors."; + auto output_tensor = Tensor(data_type, input_shape, format, tensor_type); + auto in_tensor = Tensor(data_type, output_shape, format, tensor_type); + std::vector inputs{&in_tensor}; + std::vector outputs{&output_tensor}; + runtime->SetFp16Enable(data_type == kNumberTypeFloat16); + + MS_LOG(INFO) << "Initialize input data"; + auto param = reinterpret_cast(malloc(sizeof(ActivationParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "New ActivationParameter fail."; + return; + } + param->type_ = ActivationType_HSWISH; + auto *kernel = + new (std::nothrow) kernel::ActivationOpenClKernel(reinterpret_cast(param), inputs, outputs); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Kernel:HSwish create fail."; + delete param; + return; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete param; + delete kernel; + MS_LOG(ERROR) << "Init HSwish fail."; + return; + } + inputs[0]->MallocData(allocator); + memcpy(inputs[0]->data_c(), input_data, sizeof(input_data)); + MS_LOG(INFO) << "Create kernel SubGraphOpenCLKernel."; + std::vector kernels{kernel}; + auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); + if (sub_graph == nullptr) { + delete kernel; + delete param; + MS_LOG(ERROR) << "Kernel SubGraphOpenCLKernel create fail."; + return; + } + + MS_LOG(INFO) << "Initialize sub_graph."; + ret = sub_graph->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init sub_graph error."; + delete sub_graph; + return; + } + + MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; + ret = sub_graph->Run(); + if (ret != RET_OK) { + delete param; + delete sub_graph; + MS_LOG(ERROR) << "Run SubGraphOpenCLKernel error."; + return; + } + auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); + CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001); + delete sub_graph; +} } // namespace mindspore