|
|
|
@ -1,15 +1,5 @@
|
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
|
|
|
__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
|
|
|
|
__kernel void to_format_NCHW_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
@ -47,58 +37,17 @@ __kernel void to_format_NHWC4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only
|
|
|
|
|
}
|
|
|
|
|
WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), src_data[(X * size.y + Y) * size.z + Z]);
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NC4HW4_to_NHWC4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NCHW_to_NC4HW4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC_to_NC4HW4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC4_to_NC4HW4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NC4HW4_to_NC4HW4_IMG(__global FLT4 *src_data, __write_only image2d_t dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
// size(h, w, c4, 1), shape(n, c, h, w)
|
|
|
|
|
int X = get_global_id(0); // h
|
|
|
|
|
int Y = get_global_id(1); // w
|
|
|
|
|
int Z = get_global_id(2); // c4
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// FLT4 src_final = src_data[(((Z)*src_size.y + (y_c)) * src_size.x + (x_c))];
|
|
|
|
|
WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), src_data[(Y * size.z + Z) * size.x + X]);
|
|
|
|
|
WRITE_IMAGE(dst_data, (int2)(Y, Z * size.x + X), src_data[(Z * size.x + X) * size.y + Y]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__kernel void to_format_NCHW_to_NCHW_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
@ -109,56 +58,6 @@ __kernel void to_format_NCHW_to_NCHW_BUF(__read_only image2d_t src_data, __globa
|
|
|
|
|
}
|
|
|
|
|
dst_data[(Z * size.y + Y) * size.x + X] = READ_IMAGE(src_data, smp_zero, (int2)(Y * size.x + X, Z));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC_to_NCHW_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC4_to_NCHW_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NC4HW4_to_NCHW_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NCHW_to_NHWC_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC_to_NHWC_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC4_to_NHWC_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
@ -185,25 +84,16 @@ __kernel void to_format_NHWC4_to_NHWC_BUF(__read_only image2d_t src_data, __glob
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NC4HW4_to_to_NHWC_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// WRITE_IMAGE(dst_data, (int2)(Y * size.z + Z, X), READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X)));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NC4HW4_to_NC4HW4_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|
int X = get_global_id(0);
|
|
|
|
|
int Y = get_global_id(1);
|
|
|
|
|
int Z = get_global_id(2);
|
|
|
|
|
// size(h, w, c, 1), shape(n, c, h, w)
|
|
|
|
|
int X = get_global_id(0); // h
|
|
|
|
|
int Y = get_global_id(1); // w
|
|
|
|
|
int Z = get_global_id(2); // c
|
|
|
|
|
if (X >= size.x || Y >= size.y || Z >= size.z) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
dst_data[(Y * size.z + Z) * size.x + X] = READ_IMAGE(src_data, smp_zero, (int2)(Y * size.z + Z, X));
|
|
|
|
|
dst_data[(Z * size.x + X) * size.y + Y] = READ_IMAGE(src_data, smp_zero, (int2)(Y, Z * size.x + X));
|
|
|
|
|
}
|
|
|
|
|
__kernel void to_format_NHWC4_to_NHWC4_BUF(__read_only image2d_t src_data, __global FLT4 *dst_data, int4 size,
|
|
|
|
|
int4 shape) {
|
|
|
|
|