|
|
|
@ -2,8 +2,8 @@
|
|
|
|
|
#define INT2 int2
|
|
|
|
|
#define INT4 int4
|
|
|
|
|
__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
|
|
|
|
|
__kernel void slice(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
|
|
|
|
|
INT4 begin, INT2 sharedNoUpdiv) {
|
|
|
|
|
__kernel void slice_NHWC4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
|
|
|
|
|
INT4 begin, INT2 sharedNoUpdiv) {
|
|
|
|
|
int X = get_global_id(1); // H
|
|
|
|
|
int Y = get_global_id(2); // W
|
|
|
|
|
if (X >= out_shape.y || Y >= out_shape.z) {
|
|
|
|
@ -16,7 +16,7 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output,
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i, (X)), result);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
int begin_postion = sharedNoUpdiv.y % 4;
|
|
|
|
|
int begin_postion = sharedNoUpdiv.x % 4;
|
|
|
|
|
FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + begin.w, (X + begin.y)));
|
|
|
|
|
if (begin_postion == 1) {
|
|
|
|
|
for (int i = 1; i <= out_shape.w; i++) {
|
|
|
|
@ -76,3 +76,71 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output,
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
__kernel void slice_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
|
|
|
|
|
INT4 begin, INT2 sharedNoUpdiv) {
|
|
|
|
|
int X = get_global_id(1); // H
|
|
|
|
|
int Y = get_global_id(2); // W
|
|
|
|
|
if (X >= out_shape.y || Y >= out_shape.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
FLT4 result;
|
|
|
|
|
if (sharedNoUpdiv.x % 4 == 0) {
|
|
|
|
|
for (int i = 0; i < out_shape.w; i++) {
|
|
|
|
|
result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), (i * out_shape.y + X)), result);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
int begin_postion = sharedNoUpdiv.x % 4;
|
|
|
|
|
FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (begin.w) * input_shape.y + (X + begin.y)));
|
|
|
|
|
if (begin_postion == 1) {
|
|
|
|
|
for (int i = 1; i <= out_shape.w; i++) {
|
|
|
|
|
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
|
|
|
|
|
result.x = first.y;
|
|
|
|
|
result.y = first.z;
|
|
|
|
|
result.z = first.w;
|
|
|
|
|
result.w = second.x;
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
|
|
|
|
|
first.y = second.y;
|
|
|
|
|
first.z = second.z;
|
|
|
|
|
first.w = second.w;
|
|
|
|
|
}
|
|
|
|
|
} else if (begin_postion == 2) {
|
|
|
|
|
for (int i = 1; i <= out_shape.w; i++) {
|
|
|
|
|
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
|
|
|
|
|
result.x = first.z;
|
|
|
|
|
result.y = first.w;
|
|
|
|
|
result.z = second.x;
|
|
|
|
|
result.w = second.y;
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
|
|
|
|
|
first.z = second.z;
|
|
|
|
|
first.w = second.w;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (int i = 1; i <= out_shape.w; i++) {
|
|
|
|
|
FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
|
|
|
|
|
result.x = first.w;
|
|
|
|
|
result.y = second.x;
|
|
|
|
|
result.z = second.y;
|
|
|
|
|
result.w = second.z;
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
|
|
|
|
|
first.w = second.w;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// judge the line of size
|
|
|
|
|
int size = sharedNoUpdiv.y % 4;
|
|
|
|
|
FLT4 result_fill0 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);
|
|
|
|
|
if (size == 1) {
|
|
|
|
|
result_fill0.x = result.x;
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
|
|
|
|
|
} else if (size == 2) {
|
|
|
|
|
result_fill0.x = result.x;
|
|
|
|
|
result_fill0.y = result.y;
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
|
|
|
|
|
} else if (size == 3) {
|
|
|
|
|
result_fill0.x = result.x;
|
|
|
|
|
result_fill0.y = result.y;
|
|
|
|
|
result_fill0.z = result.z;
|
|
|
|
|
WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|