!4138 change buffer to image2d for arithmetic
Merge pull request !4138 from liuchao/arith_imagepull/4138/MERGE
commit
a6dc949054
@ -1,15 +1,65 @@
|
||||
__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
|
||||
|
||||
__kernel void ElementAdd(__read_only image2d_t *input_a, __read_only image2d_t *input_b, __write_only image2d_t *output,
|
||||
const int4 output_shape) {
|
||||
__kernel void ElementAdd(__read_only image2d_t input_a, __read_only image2d_t input_b, __write_only image2d_t output,
|
||||
const int2 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
int Z = get_global_id(2);
|
||||
if (X >= output_shape.x || Y >= output_shape.y || Z >= output_shape.w) return;
|
||||
if (X >= output_shape.x || Y >= output_shape.y) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (idx >= n) return;
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y * output_shape.w + Z));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y * output_shape.w + Z));
|
||||
src = a + b;
|
||||
write_imagef(output, (int2)(0, 0), src);
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
||||
write_imagef(output, (int2)(X, Y), a + b);
|
||||
}
|
||||
|
||||
__kernel void ElementSub(__read_only image2d_t input_a, __read_only image2d_t input_b, __write_only image2d_t output,
|
||||
const int2 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
if (X >= output_shape.x || Y >= output_shape.y) {
|
||||
return;
|
||||
}
|
||||
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
||||
write_imagef(output, (int2)(X, Y), a - b);
|
||||
}
|
||||
|
||||
__kernel void ElementMul(__read_only image2d_t input_a, __read_only image2d_t input_b, __write_only image2d_t output,
|
||||
const int2 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
if (X >= output_shape.x || Y >= output_shape.y) {
|
||||
return;
|
||||
}
|
||||
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
||||
write_imagef(output, (int2)(X, Y), a * b);
|
||||
}
|
||||
|
||||
__kernel void ElementDiv(__read_only image2d_t input_a, __read_only image2d_t input_b, __write_only image2d_t output,
|
||||
const int2 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
if (X >= output_shape.x || Y >= output_shape.y) {
|
||||
return;
|
||||
}
|
||||
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y));
|
||||
write_imagef(output, (int2)(X, Y), a / b);
|
||||
}
|
||||
|
||||
__kernel void BoardcastArith(__read_only image2d_t input_a, float weight, float bias, __write_only image2d_t output,
|
||||
const int2 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
if (X >= output_shape.x || Y >= output_shape.y) {
|
||||
return;
|
||||
}
|
||||
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y));
|
||||
write_imagef(output, (int2)(X, Y), weight * a + bias);
|
||||
}
|
||||
|
Loading…
Reference in new issue