|
|
|
@ -94,6 +94,7 @@ __global__ void KeBilinearInterpFw(
|
|
|
|
|
int nthreads = output_h * output_w;
|
|
|
|
|
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
|
int stride = blockDim.x * gridDim.x;
|
|
|
|
|
bool align_flag = (align_mode == 0 && !align_corners);
|
|
|
|
|
for (; tid < nthreads; tid += stride) {
|
|
|
|
|
int out_id_h = tid / output_w;
|
|
|
|
|
int out_id_w = tid % output_w;
|
|
|
|
@ -102,25 +103,23 @@ __global__ void KeBilinearInterpFw(
|
|
|
|
|
int channel_id = out_id_w / out_img_size;
|
|
|
|
|
|
|
|
|
|
int out_img_idy = (out_id_w % out_img_size) / out_img_w;
|
|
|
|
|
int in_img_idy = (align_mode == 0 && !align_corners)
|
|
|
|
|
int in_img_idy = align_flag
|
|
|
|
|
? static_cast<int>(ratio_h * (out_img_idy + 0.5) - 0.5)
|
|
|
|
|
: static_cast<int>(ratio_h * out_img_idy);
|
|
|
|
|
in_img_idy = (in_img_idy > 0) ? in_img_idy : 0;
|
|
|
|
|
int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
|
|
|
|
|
T h1lambda = (align_mode == 0 && !align_corners)
|
|
|
|
|
? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
|
|
|
|
|
: ratio_h * out_img_idy - in_img_idy;
|
|
|
|
|
T h1lambda = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
|
|
|
|
|
: ratio_h * out_img_idy - in_img_idy;
|
|
|
|
|
T h2lambda = 1.f - h1lambda;
|
|
|
|
|
|
|
|
|
|
int out_img_idx = tid % out_img_w;
|
|
|
|
|
int in_img_idx = (align_mode == 0 && !align_corners)
|
|
|
|
|
int in_img_idx = align_flag
|
|
|
|
|
? static_cast<int>(ratio_w * (out_img_idx + 0.5) - 0.5)
|
|
|
|
|
: static_cast<int>(ratio_w * out_img_idx);
|
|
|
|
|
in_img_idx = (in_img_idx > 0) ? in_img_idx : 0;
|
|
|
|
|
int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
|
|
|
|
|
T w1lambda = (align_mode == 0 && !align_corners)
|
|
|
|
|
? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
|
|
|
|
|
: ratio_w * out_img_idx - in_img_idx;
|
|
|
|
|
T w1lambda = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
|
|
|
|
|
: ratio_w * out_img_idx - in_img_idx;
|
|
|
|
|
T w2lambda = 1.f - w1lambda;
|
|
|
|
|
|
|
|
|
|
const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
|
|
|
|
@ -144,6 +143,7 @@ __global__ void KeBilinearInterpBw(
|
|
|
|
|
int nthreads = output_h * output_w;
|
|
|
|
|
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
|
int stride = blockDim.x * gridDim.x;
|
|
|
|
|
bool align_flag = (align_mode == 0 && !align_corners);
|
|
|
|
|
for (; tid < nthreads; tid += stride) {
|
|
|
|
|
int out_id_h = tid / output_w;
|
|
|
|
|
int out_id_w = tid % output_w;
|
|
|
|
@ -152,26 +152,22 @@ __global__ void KeBilinearInterpBw(
|
|
|
|
|
int channel_id = out_id_w / out_img_size;
|
|
|
|
|
|
|
|
|
|
int out_img_idy = (out_id_w % out_img_size) / out_img_w;
|
|
|
|
|
int in_img_idy = (align_mode == 0 && !align_corners)
|
|
|
|
|
? ratio_h * (out_img_idy + 0.5) - 0.5
|
|
|
|
|
: ratio_h * out_img_idy;
|
|
|
|
|
int in_img_idy = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5
|
|
|
|
|
: ratio_h * out_img_idy;
|
|
|
|
|
in_img_idy = (in_img_idy > 0) ? in_img_idy : 0;
|
|
|
|
|
int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
|
|
|
|
|
T h1lambda = (align_mode == 0 && !align_corners)
|
|
|
|
|
? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
|
|
|
|
|
: ratio_h * out_img_idy - in_img_idy;
|
|
|
|
|
T h1lambda = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
|
|
|
|
|
: ratio_h * out_img_idy - in_img_idy;
|
|
|
|
|
|
|
|
|
|
T h2lambda = 1.f - h1lambda;
|
|
|
|
|
|
|
|
|
|
int out_img_idx = tid % out_img_w;
|
|
|
|
|
int in_img_idx = (align_mode == 0 && !align_corners)
|
|
|
|
|
? ratio_w * (out_img_idx + 0.5) - 0.5
|
|
|
|
|
: ratio_w * out_img_idx;
|
|
|
|
|
int in_img_idx = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5
|
|
|
|
|
: ratio_w * out_img_idx;
|
|
|
|
|
in_img_idx = (in_img_idx > 0) ? in_img_idx : 0;
|
|
|
|
|
int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
|
|
|
|
|
T w1lambda = (align_mode == 0 && !align_corners)
|
|
|
|
|
? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
|
|
|
|
|
: ratio_w * out_img_idx - in_img_idx;
|
|
|
|
|
T w1lambda = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
|
|
|
|
|
: ratio_w * out_img_idx - in_img_idx;
|
|
|
|
|
T w2lambda = 1.f - w1lambda;
|
|
|
|
|
|
|
|
|
|
T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
|
|
|
|
|