!3845 add image2d for opencl depthwise op and testcase

Merge pull request !3845 from wandongdong/master
pull/3845/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit ccb80694fd

@ -21,9 +21,12 @@
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/arm/fp32/convolution_depthwise.h"
#include "src/runtime/kernel/arm/opclib/pack.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/fp16/depthwise_conv2d.cl.inc"
#include "src/runtime/kernel/opencl/cl/fp32/depthwise_conv2d.cl.inc"
#endif
using mindspore::kernel::KERNEL_ARCH::kGPU;
@ -35,20 +38,31 @@ namespace mindspore::kernel {
int DepthwiseConv2dOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name = "DepthwiseConv2d_NHWC4";
std::string kernel_name = "DepthwiseConv2d";
auto in_format = inputs_[0]->GetFormat();
outputs_[0]->SetFormat(in_format);
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") " << "format not support!";
}
if (mem_type_ == MEM_TYPE::BUF) {
kernel_name += "_BUF";
} else {
kernel_name += "_IMG";
}
if (in_format == schema::Format_NC4HW4) {
kernel_name = "DepthwiseConv2d_NC4HW4";
kernel_name += "_NC4HW4";
} else if (in_format == schema::Format_NHWC4) {
kernel_name += "_NHWC4";
}
auto parameter = reinterpret_cast<ConvParameter *>(opParameter);
if (parameter->kernel_h_ == 1) {
kernel_name += "_1x1";
}
#ifdef PROGRAM_WITH_IL
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
#else
std::string program_name = "DepthwiseConv2d";
std::set<std::string> build_options;
std::set <std::string> build_options;
#ifdef ENABLE_FP16
std::string source = depthwise_conv2d_source_fp16;
#else
@ -61,8 +75,9 @@ int DepthwiseConv2dOpenCLKernel::Init() {
MS_LOG(DEBUG) << kernel_name << " Init Done!";
return 0;
}
int DepthwiseConv2dOpenCLKernel::InitBuffer() {
auto parameter = reinterpret_cast<ConvParameter*>(opParameter);
auto parameter = reinterpret_cast<ConvParameter *>(opParameter);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
@ -89,54 +104,101 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
size_t up_co_size = C4NUM * CO4 * sizeof(FLOAT_t);
memset_s(bias_data_, up_co_size, 0, up_co_size);
auto ori_bias = reinterpret_cast<FLOAT_t *>(inputs_.at(kBiasIndex)->Data());
memcpy_s(bias_data_, outputs_[0]->Channel() * sizeof(FLOAT_t), ori_bias, outputs_[0]->Channel() * sizeof(FLOAT_t));
memcpy_s(bias_data_, outputs_[0]->Channel() * sizeof(FLOAT_t), ori_bias,
outputs_[0]->Channel() * sizeof(FLOAT_t));
allocator->UnmapBuffer(bias_data_);
} else {
MS_ASSERT(inputs_.size() == kInputSize1);
}
return 0;
}
int DepthwiseConv2dOpenCLKernel::ReSize() {
return 0;
}
int DepthwiseConv2dOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->Name() << " Running!";
auto parameter = reinterpret_cast<ConvParameter*>(opParameter);
auto parameter = reinterpret_cast<ConvParameter *>(opParameter);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
size_t CO4 = UP_DIV(outputs_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(inputs_[0]->Channel(), C4NUM);
std::vector<size_t> global = {(size_t)outputs_[0]->Width(), (size_t)outputs_[0]->Height(), CO4};
std::vector<size_t> local = {1, 1, 1};
std::vector <size_t> global = {(size_t) outputs_[0]->Width(), (size_t) outputs_[0]->Height(), CO4};
std::vector <size_t> local = {1, 1, CO4};
float relu_clip1 = 6.0;
cl_int2 kernel_size = {parameter->kernel_h_, parameter->kernel_w_};
cl_int2 stride = {parameter->stride_h_, parameter->stride_w_};
cl_int2 padding = {-parameter->pad_h_, -parameter->pad_w_};
cl_int2 dilation = {parameter->dilation_h_, parameter->dilation_w_};
cl_int4 src_size = {inputs_[0]->Width(), inputs_[0]->Height(), (cl_int)CI4, inputs_[0]->Batch()};
cl_int4 dst_size = {(cl_int)outputs_[0]->Width(), (cl_int)outputs_[0]->Height(), (cl_int)CO4,
(cl_int)outputs_[0]->Batch()};
ocl_runtime->SetKernelArg(kernel_, 0, inputs_[0]->Data());
cl_int4 src_size = {inputs_[0]->Width(), inputs_[0]->Height(), (cl_int) CI4, inputs_[0]->Batch()};
cl_int4 dst_size = {(cl_int) outputs_[0]->Width(), (cl_int) outputs_[0]->Height(), (cl_int) CO4,
(cl_int) outputs_[0]->Batch()};
ocl_runtime->SetKernelArg(kernel_, 1, packed_weight_);
ocl_runtime->SetKernelArg(kernel_, 2, bias_data_);
ocl_runtime->SetKernelArg(kernel_, 3, relu_clip1);
ocl_runtime->SetKernelArg(kernel_, 4, outputs_[0]->Data());
ocl_runtime->SetKernelArg(kernel_, 5, kernel_size);
ocl_runtime->SetKernelArg(kernel_, 6, stride);
ocl_runtime->SetKernelArg(kernel_, 7, padding);
ocl_runtime->SetKernelArg(kernel_, 8, dilation);
ocl_runtime->SetKernelArg(kernel_, 9, src_size);
ocl_runtime->SetKernelArg(kernel_, 10, dst_size);
if (mem_type_ == MEM_TYPE::BUF) {
ocl_runtime->SetKernelArg(kernel_, 0, inputs_[0]->Data());
ocl_runtime->SetKernelArg(kernel_, 4, outputs_[0]->Data());
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
} else {
cl::ImageFormat image_format;
{
image_format.image_channel_order = CL_RGBA;
image_format.image_channel_data_type = CL_FLOAT;
}
cl_int in_error_code;
size_t im_src_x, im_src_y;
size_t im_dst_x, im_dst_y;
if (inputs_[0]->GetFormat() == schema::Format_NHWC4) {
im_src_x = inputs_[0]->Width() * CI4;
im_src_y = inputs_[0]->Height();
im_dst_x = outputs_[0]->Width() * CO4;
im_dst_y = outputs_[0]->Height();
} else {
im_src_y = inputs_[0]->Height() * CI4;
im_src_x = inputs_[0]->Width();
im_dst_y = outputs_[0]->Height() * CO4;
im_dst_x = outputs_[0]->Width();
}
cl::Image2D in_mem(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image_format,
im_src_x, im_src_y, 0, inputs_[0]->Data(), &in_error_code);
cl_int out_error_code;
cl::Image2D out_mem(*ocl_runtime->Context(), CL_MEM_WRITE_ONLY, image_format,
im_dst_x, im_dst_y, 0, nullptr, &out_error_code);
if (in_error_code != CL_SUCCESS) {
MS_LOG(DEBUG) << "in Image2D Failed, error=" << in_error_code;
return 1;
}
if (out_error_code != CL_SUCCESS) {
MS_LOG(DEBUG) << "out Image2D Failed, error= " << out_error_code;
return 1;
}
auto origin = cl::array < cl::size_type,
3U > {0, 0, 0};
auto region = cl::array < cl::size_type,
3U > {im_dst_x, im_dst_y, 1};
ocl_runtime->SetKernelArg(kernel_, 0, in_mem);
ocl_runtime->SetKernelArg(kernel_, 4, out_mem);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime->GetDefaultCommandQueue()->enqueueReadImage(out_mem, CL_TRUE, origin, region, 0, 0,
outputs_[0]->Data());
}
return 0;
}
kernel::LiteKernel *OpenCLDepthwiseConv2dKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
auto *kernel = new DepthwiseConv2dOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
auto ret = kernel->Init();
if (0 != ret) {
@ -147,6 +209,7 @@ kernel::LiteKernel *OpenCLDepthwiseConv2dKernelCreator(const std::vector<lite::t
return kernel;
}
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DepthwiseConv2D, OpenCLDepthwiseConv2dKernelCreator)
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DepthwiseConv2D, OpenCLDepthwiseConv2dKernelCreator
)
} // namespace mindspore::kernel

@ -18,12 +18,10 @@
#define MINDSPORE_LITE_SRC_BACKEND_OPENCL_DEPTHWISE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/kernel/arm/opclib/conv_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"
namespace mindspore::kernel {
class DepthwiseConv2dOpenCLKernel : public LiteKernel {
@ -31,18 +29,25 @@ class DepthwiseConv2dOpenCLKernel : public LiteKernel {
explicit DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs),
packed_weight_(nullptr), bias_data_(nullptr), kernel_(nullptr) {}
packed_weight_(nullptr), bias_data_(nullptr), kernel_(nullptr) {}
~DepthwiseConv2dOpenCLKernel() override {};
int Init() override;
int ReSize() override;
int Run() override;
int InitBuffer();
private:
FLOAT_t *packed_weight_;
FLOAT_t *bias_data_;
cl::Kernel kernel_;
enum class MEM_TYPE {
BUF, IMG
} mem_type_{MEM_TYPE::BUF};
};
} // namespace mindspore::kernel

@ -264,6 +264,8 @@ if (SUPPORT_GPU)
set(TEST_SRC
${TEST_SRC}
${TEST_DIR}/ut/stc/runtime/kernel/opencl/matmul_tests.cc
${TEST_DIR}/ut/stc/runtime/kernel/opencl/depthwise_conv2d_tests.cc
${TEST_DIR}/ut/stc/runtime/kernel/opencl/concat_tests.cc
${TEST_DIR}/ut/stc/runtime/kernel/opencl/softmax_cl_tests.cc
)
endif()

Loading…
Cancel
Save