parent
1091cdeedf
commit
cbf7f21bdd
@ -1,49 +0,0 @@
|
||||
__kernel void ArithmeticAdd(__global float *input_a,
|
||||
__global float *input_b,
|
||||
__global float *output,
|
||||
const unsigned int n) {
|
||||
int id = get_global_id(0);
|
||||
if (id < n) {
|
||||
output[id] = input_a[id] + input_b[id];
|
||||
}
|
||||
}
|
||||
__kernel void ArithmeticSub(__global float *input_a,
|
||||
__global float *input_b,
|
||||
__global float *output,
|
||||
const unsigned int n) {
|
||||
int id = get_global_id(0);
|
||||
if (id < n) {
|
||||
output[id] = input_a[id] - input_b[id];
|
||||
}
|
||||
}
|
||||
__kernel void ArithmeticMul(__global float *input_a,
|
||||
__global float *input_b,
|
||||
__global float *output,
|
||||
const unsigned int n) {
|
||||
int id = get_global_id(0);
|
||||
if (id < n) {
|
||||
output[id] = input_a[id] * input_b[id];
|
||||
}
|
||||
}
|
||||
__kernel void ArithmeticDiv(__global float *input_a,
|
||||
__global float *input_b,
|
||||
__global float *output,
|
||||
const unsigned int n) {
|
||||
int id = get_global_id(0);
|
||||
if (id < n) {
|
||||
output[id] = input_a[id] * input_b[id];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void ArithmeticBiasAdd(__global float4 *input,
|
||||
__global float4 *output,
|
||||
const float weight,
|
||||
const float bias,
|
||||
const unsigned int n) {
|
||||
int id = get_global_id(0);
|
||||
float4 bias_vec = (float4)(bias, 0.0f, .0f, .0f);
|
||||
float4 weight_vec = (float4)(weight, 0.0f, .0f, .0f);
|
||||
if (id < n) {
|
||||
output[id] = weight_vec * input[id] + bias_vec;
|
||||
}
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
__kernel void ElementAdd(__global float *input_a, __global float *input_b, __global float *output,
|
||||
const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] + input_b[idx];
|
||||
}
|
||||
|
||||
__kernel void ElementSub(__global float *input_a, __global float *input_b, __global float *output,
|
||||
const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] - input_b[idx];
|
||||
}
|
||||
|
||||
__kernel void ElementMul(__global float *input_a, __global float *input_b, __global float *output,
|
||||
const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] * input_b[idx];
|
||||
}
|
||||
|
||||
__kernel void ElementDiv(__global float *input_a, __global float *input_b, __global float *output,
|
||||
const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] * input_b[idx];
|
||||
}
|
||||
|
||||
__kernel void BoardcastAdd(__global float *input_a, float input_b, __global float *output, const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] + input_b;
|
||||
}
|
||||
|
||||
__kernel void BoardcastSub(__global float *input_a, float input_b, __global float *output, const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] - input_b;
|
||||
}
|
||||
|
||||
__kernel void BoardcastMul(__global float *input_a, float input_b, __global float *output, const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] * input_b;
|
||||
}
|
||||
|
||||
__kernel void BoardcastDiv(__global float *input_a, float input_b, __global float *output, const unsigned int n) {
|
||||
int idx = get_global_id(0);
|
||||
if (idx >= n) return;
|
||||
output[idx] = input_a[idx] * input_b;
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
|
||||
|
||||
__kernel void ElementAdd(__read_only image2d_t *input_a, __read_only image2d_t *input_b, __write_only image2d_t *output,
|
||||
const int4 output_shape) {
|
||||
int X = get_global_id(0);
|
||||
int Y = get_global_id(1);
|
||||
int Z = get_global_id(2);
|
||||
if (X >= output_shape.x || Y >= output_shape.y || Z >= output_shape.w) return;
|
||||
|
||||
if (idx >= n) return;
|
||||
float4 a = read_imagef(input_a, smp_none, (int2)(X, Y * output_shape.w + Z));
|
||||
float4 b = read_imagef(input_b, smp_none, (int2)(X, Y * output_shape.w + Z));
|
||||
src = a + b;
|
||||
write_imagef(output, (int2)(0, 0), src);
|
||||
}
|
Loading…
Reference in new issue