!7357 support unaligned shape for opencl reshape op

Merge pull request !7357 from wandongdong/master
pull/7357/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 56b7562bf5

@ -1,24 +1,70 @@
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define C4NUM 4
#define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
__kernel void reshape_NHWC4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 size, int4 size_out) {
__kernel void reshape_NHWC4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size,
int4 dst_size) {
int X = get_global_id(0);
if (X >= size_out.x * size_out.y * size_out.z * size_out.w) {
int Y = get_global_id(1);
int CO4 = UP_DIV(dst_size.z, C4NUM);
int CO4_rem = dst_size.z % C4NUM;
if (X >= dst_size.x || Y > dst_size.y) {
return;
}
int in_img_x = size.z * size.w;
int out_img_x = size_out.z * size_out.w;
WRITE_IMAGE(dst_data, (int2)(X % out_img_x, X / out_img_x),
READ_IMAGE(src_data, smp_zero, (int2)(X % in_img_x, X / in_img_x)));
int CI4 = UP_DIV(src_size.x, C4NUM);
int CI4_rem = src_size.x % C4NUM;
int in_img_x = CI4 * src_size.y;
FLT4 res = (FLT4)(0.0f);
FLT tmp[4];
FLT res_tmp[4];
int gcnt = 0;
int start = 0;
int i = 0;
int j = 0;
int n = 0;
int cond = (((int)(CO4_rem > 0)) << 1) | (CI4_rem > 0);
switch (cond) {
case 1:
start = ((X / CO4 * dst_size.z + min(dst_size.z, (X % CO4) * C4NUM)) + dst_size.w * Y);
gcnt = start / src_size.x * CI4 + (start % src_size.x) / C4NUM;
start = (CI4 > 1 && gcnt < CI4) ? 0 : ((X + Y * dst_size.x) * C4NUM) % src_size.x % C4NUM;
for (i = 0, n = 0, j = start; i < 4; ++n, j = 0) {
int X_src = (gcnt + n) % in_img_x;
res = READ_IMAGE(src_data, smp_zero, (int2)(X_src, (gcnt + n) / in_img_x));
tmp[0] = res.x;
tmp[1] = res.y;
tmp[2] = res.z;
tmp[3] = res.w;
int k = (X_src % CI4) == (CI4 - 1) ? CI4_rem : 4;
for (; j < k && i < 4; ++j, ++i) {
res_tmp[i] = tmp[j];
}
}
res.x = res_tmp[0];
res.y = res_tmp[1];
res.z = res_tmp[2];
res.w = res_tmp[3];
WRITE_IMAGE(dst_data, (int2)(X, Y), res);
break;
default:
gcnt = X + dst_size.x * Y;
res = READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x));
WRITE_IMAGE(dst_data, (int2)(X, Y), res);
}
}
__kernel void reshape_NC4HW4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 size,
int4 size_out) {
__kernel void reshape_NC4HW4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size,
int4 dst_size) {
int X = get_global_id(0);
if (X >= size_out.x * size_out.y * size_out.z * size_out.w) {
int Y = get_global_id(1);
int CO4 = UP_DIV(dst_size.z, C4NUM);
int CO4_rem = dst_size.z % C4NUM;
if (X >= dst_size.x || Y > dst_size.y) {
return;
}
int in_img_x = size.z;
int out_img_x = size_out.z;
WRITE_IMAGE(dst_data, (int2)(X % out_img_x, X / out_img_x),
READ_IMAGE(src_data, smp_zero, (int2)(X % in_img_x, X / in_img_x)));
int CI4 = UP_DIV(src_size.x, C4NUM);
int CI4_rem = src_size.x % C4NUM;
int in_img_x = CI4 * src_size.y;
int gcnt = X + dst_size.x * Y;
WRITE_IMAGE(dst_data, (int2)(X, Y), READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x)));
}

@ -76,6 +76,7 @@ int ConcatOpenCLKernel::Init() {
auto param = reinterpret_cast<ConcatParameter *>(this->op_parameter_);
MS_LOG(DEBUG) << " concat at axis=: " << param->axis_;
param->axis_ = (param->axis_ == -1) ? (in_tensors_[0]->shape().size() - 1) : param->axis_;
if (param->axis_ < 0 || param->axis_ > 3) {
MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 ";
return RET_ERROR;

@ -38,12 +38,6 @@ int ReshapeOpenCLKernel::Init() {
MS_LOG(ERROR) << "Reshape output size should in 2,4";
return RET_ERROR;
}
if ((in_tensors_[0]->shape().back() % 4 != 0 || out_tensors_[0]->shape().back() % 4 != 0) &&
in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) {
MS_LOG(ERROR) << "Reshape input channel align 4 should equal output channel, cin:" << in_tensors_[0]->shape().back()
<< " cout:" << out_tensors_[0]->shape().back();
return RET_ERROR;
}
if (in_tensors_[0]->shape().size() == 2) {
inShape = {in_tensors_[0]->shape()[0], 1, 1, in_tensors_[0]->shape()[1]};
} else {
@ -81,6 +75,10 @@ int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
int h = outShape[1];
int w = outShape[2];
int c = outShape[3];
if (img_size_.size() == OpenCLImageSizeIndex::IDX_NUM) {
*img_size = img_size_;
return RET_OK;
}
if (op_format_ == schema::Format::Format_NHWC4) {
im_dst_x = w * UP_DIV(c, C4NUM);
im_dst_y = n * h;
@ -98,6 +96,7 @@ int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
img_size->clear();
std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype};
*img_size = vec;
img_size_ = vec;
return RET_OK;
}
@ -105,15 +104,15 @@ int ReshapeOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
std::vector<size_t> local = {};
std::vector<size_t> global = {
static_cast<size_t>(outShape[0] * outShape[1] * outShape[2] * UP_DIV(outShape[3], C4NUM))};
cl_int4 size = {inShape[0], inShape[1], inShape[2], UP_DIV(inShape[3], C4NUM)};
cl_int4 size_out = {outShape[0], outShape[1], outShape[2], UP_DIV(outShape[3], C4NUM)};
std::vector<size_t> global{img_size_[0], img_size_[1]};
cl_int4 src_size = {inShape[3], inShape[2], inShape[1], inShape[0]};
cl_int4 dst_size = {static_cast<cl_int>(img_size_[0]), static_cast<cl_int>(img_size_[1]), outShape[3],
outShape[3] * outShape[2]};
int arg_idx = 0;
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size_out);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}

@ -25,6 +25,7 @@
namespace mindspore::kernel {
enum class OpenCLMemType { BUF, IMG };
enum OpenCLImageSizeIndex { IDX_X = 0, IDX_Y, IDX_DTYPE, IDX_NUM };
struct OpenCLToFormatParameter {
OpParameter op_parameter;
@ -66,6 +67,7 @@ class OpenCLKernel : public LiteKernel {
schema::Format op_format_{schema::Format::Format_NHWC4};
lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_;
lite::opencl::OpenCLRuntime *ocl_runtime_;
std::vector<size_t> img_size_;
};
} // namespace mindspore::kernel

@ -29,33 +29,23 @@ class TestReshapeOpenCL : public mindspore::CommonTest {
TestReshapeOpenCL() {}
};
void RunTestCaseReshape(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16,
bool is_output_2d) {
void RunTestCaseReshape(const std::vector<int> &shape_in, const std::vector<int> &shape_out, void *input_data,
void *output_data, bool enable_fp16) {
auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
ocl_runtime->Init();
size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float);
ocl_runtime->SetFp16Enable(enable_fp16);
auto allocator = ocl_runtime->GetAllocator();
int n = shape[0];
int h = shape[1];
int w = shape[2];
int c = shape[3];
int oh = shape[4];
int ow = shape[5];
std::vector<int> input_shape = {n, h, w, c};
auto tensor_x_ptr = std::make_unique<lite::Tensor>(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32),
input_shape, schema::Format_NHWC);
shape_in, schema::Format_NHWC);
auto tensor_x = tensor_x_ptr.get();
if (tensor_x == nullptr) {
MS_LOG(ERROR) << "tensor_x create error.";
return;
}
std::vector<int> out_shape = {n, oh, ow, c};
if (is_output_2d) {
out_shape = {n, h * w * c};
}
bool is_output_2d = shape_out.size() == 2;
auto tensor_out_ptr =
std::make_unique<lite::Tensor>(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), out_shape,
std::make_unique<lite::Tensor>(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), shape_out,
is_output_2d ? schema::Format_NC : schema::Format_NHWC);
auto tensor_out = tensor_out_ptr.get();
if (tensor_out == nullptr) {
@ -102,74 +92,108 @@ void RunTestCaseReshape(const std::vector<int> &shape, void *input_data, void *o
}
TEST_F(TestReshapeOpenCL, ReshapeFp32) {
int n = 1;
int h = 1;
int w = 1;
int c = 7;
int oh = 1;
int ow = 1;
std::vector<int> shape = {n, h, w, c, oh, ow};
std::vector<int> shape_in = {1, 1, 1, 7};
std::vector<int> shape_out = {1, 7};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
RunTestCaseReshape(shape, input_data.data(), output_data.data(), false, true);
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, ReshapeFp16) {
int n = 1;
int h = 1;
int w = 1;
int c = 7;
int oh = 1;
int ow = 1;
std::vector<int> shape = {n, h, w, c, oh, ow};
std::vector<int> shape_in = {1, 1, 1, 7};
std::vector<int> shape_out = {1, 7};
std::vector<float16_t> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
std::vector<float16_t> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
RunTestCaseReshape(shape, input_data.data(), output_data.data(), true, true);
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), true);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp32) {
int n = 1;
int h = 2;
int w = 2;
int c = 3;
int oh = 1;
int ow = 4;
std::vector<int> shape = {n, h, w, c, oh, ow};
std::vector<int> shape_in = {1, 2, 2, 3};
std::vector<int> shape_out = {1, 1, 4, 3};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
RunTestCaseReshape(shape, input_data.data(), output_data.data(), false, false);
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp16) {
int n = 1;
int h = 2;
int w = 2;
int c = 3;
int oh = 1;
int ow = 4;
std::vector<int> shape = {n, h, w, c, oh, ow};
std::vector<int> shape_in = {1, 2, 2, 3};
std::vector<int> shape_out = {1, 1, 4, 3};
std::vector<float16_t> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
std::vector<float16_t> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f};
RunTestCaseReshape(shape, input_data.data(), output_data.data(), true, false);
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), true);
}
TEST_F(TestReshapeOpenCL, Reshape4D2DFp32) {
int n = 1;
int h = 2;
int w = 2;
int c = 4;
int oh = 2;
int ow = 2;
std::vector<int> shape = {n, h, w, c, oh, ow};
std::vector<int> shape_in = {1, 2, 2, 4};
std::vector<int> shape_out = {4, 4};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f,
8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f};
RunTestCaseReshape(shape, input_data.data(), output_data.data(), false, true);
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem10) {
std::vector<int> shape_in = {1, 3, 2, 4};
std::vector<int> shape_out = {1, 4, 2, 3};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f};
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem01Test0) {
std::vector<int> shape_in = {1, 4, 2, 3};
std::vector<int> shape_out = {1, 3, 2, 4};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f};
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem01Test1) {
std::vector<int> shape_in = {1, 2, 2, 5};
std::vector<int> shape_out = {1, 1, 5, 4};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f};
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem01Test2) {
std::vector<int> shape_in = {1, 4, 2, 5};
std::vector<int> shape_out = {1, 2, 5, 4};
std::vector<float> input_data = {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f,
14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f,
28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
};
std::vector<float> output_data = {
0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f,
14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f,
28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
};
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem11) {
std::vector<int> shape_in = {1, 3, 2, 5};
std::vector<int> shape_out = {1, 5, 2, 3};
std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f};
std::vector<float> output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f};
RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false);
}
} // namespace mindspore

Loading…
Cancel
Save