|
|
|
@ -199,7 +199,8 @@ __global__ void im2colOCF(const T* im_data, T* col_data, int input_channels,
|
|
|
|
|
int input_height, int input_width, int filter_height,
|
|
|
|
|
int filter_width, int stride_height, int stride_width,
|
|
|
|
|
int padding_height, int padding_width,
|
|
|
|
|
int output_height, int output_width) {
|
|
|
|
|
int output_height, int output_width, int row_begin,
|
|
|
|
|
int row_end) {
|
|
|
|
|
int swid = blockIdx.x;
|
|
|
|
|
int shid = blockIdx.y;
|
|
|
|
|
for (int channelid = threadIdx.z; channelid < input_channels;
|
|
|
|
@ -207,7 +208,8 @@ __global__ void im2colOCF(const T* im_data, T* col_data, int input_channels,
|
|
|
|
|
for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) {
|
|
|
|
|
for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) {
|
|
|
|
|
int width_offset = idx + swid * stride_width - padding_width;
|
|
|
|
|
int height_offset = idy + shid * stride_height - padding_height;
|
|
|
|
|
int height_offset =
|
|
|
|
|
idy + (shid + row_begin) * stride_height - padding_height;
|
|
|
|
|
int im_offset = width_offset + height_offset * input_width +
|
|
|
|
|
channelid * input_height * input_width;
|
|
|
|
|
|
|
|
|
@ -238,8 +240,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
public:
|
|
|
|
|
void operator()(const platform::DeviceContext& context,
|
|
|
|
|
const framework::Tensor& im, framework::Tensor& col,
|
|
|
|
|
int stride_height, int stride_width, int padding_height,
|
|
|
|
|
int padding_width) {
|
|
|
|
|
int stride_height, int stride_width, int up_pad,
|
|
|
|
|
int down_pad) {
|
|
|
|
|
PADDLE_ENFORCE(im.dims().size() == 3);
|
|
|
|
|
PADDLE_ENFORCE(col.dims().size() == 5);
|
|
|
|
|
int input_channels = im.dims()[0];
|
|
|
|
@ -247,7 +249,20 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
int input_width = im.dims()[2];
|
|
|
|
|
int filter_height = col.dims()[3];
|
|
|
|
|
int filter_width = col.dims()[4];
|
|
|
|
|
int output_height = col.dims()[0];
|
|
|
|
|
|
|
|
|
|
int row_begin, row_end;
|
|
|
|
|
int padding_height = std::max(up_pad, down_pad);
|
|
|
|
|
int padding_width = 0;
|
|
|
|
|
if (up_pad >= down_pad) {
|
|
|
|
|
row_begin = 0;
|
|
|
|
|
} else {
|
|
|
|
|
row_begin = down_pad - up_pad;
|
|
|
|
|
}
|
|
|
|
|
row_end = row_begin + ((input_height + up_pad + down_pad - filter_height) /
|
|
|
|
|
stride_height +
|
|
|
|
|
1);
|
|
|
|
|
|
|
|
|
|
int output_height = row_end - row_begin; // col.dims()[0];
|
|
|
|
|
int output_width = col.dims()[1];
|
|
|
|
|
|
|
|
|
|
int block_dim_x = 0;
|
|
|
|
@ -275,7 +290,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
.stream()>>>(
|
|
|
|
|
im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width,
|
|
|
|
|
padding_height, padding_width, output_height, output_width);
|
|
|
|
|
padding_height, padding_width, output_height, output_width, row_begin,
|
|
|
|
|
row_end);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -284,7 +300,8 @@ __global__ void col2imOCF(T* im_data, const T* col_data, int input_channels,
|
|
|
|
|
int input_height, int input_width, int filter_height,
|
|
|
|
|
int filter_width, int stride_height, int stride_width,
|
|
|
|
|
int padding_height, int padding_width,
|
|
|
|
|
int output_height, int output_width) {
|
|
|
|
|
int output_height, int output_width, int row_begin,
|
|
|
|
|
int row_end) {
|
|
|
|
|
int swid = blockIdx.x;
|
|
|
|
|
int shid = blockIdx.y;
|
|
|
|
|
for (int channelid = threadIdx.z; channelid < input_channels;
|
|
|
|
@ -292,7 +309,8 @@ __global__ void col2imOCF(T* im_data, const T* col_data, int input_channels,
|
|
|
|
|
for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) {
|
|
|
|
|
for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) {
|
|
|
|
|
int width_offset = idx + swid * stride_width - padding_width;
|
|
|
|
|
int height_offset = idy + shid * stride_height - padding_height;
|
|
|
|
|
int height_offset =
|
|
|
|
|
idy + (shid + row_begin) * stride_height - padding_height;
|
|
|
|
|
int im_offset = width_offset + height_offset * input_width +
|
|
|
|
|
channelid * input_height * input_width;
|
|
|
|
|
|
|
|
|
@ -322,7 +340,7 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
public:
|
|
|
|
|
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
|
|
|
|
|
const framework::Tensor& col, int stride_height,
|
|
|
|
|
int stride_width, int padding_height, int padding_width) {
|
|
|
|
|
int stride_width, int up_pad, int down_pad) {
|
|
|
|
|
PADDLE_ENFORCE(im.dims().size() == 3);
|
|
|
|
|
PADDLE_ENFORCE(col.dims().size() == 5);
|
|
|
|
|
int input_channels = im.dims()[0];
|
|
|
|
@ -330,7 +348,20 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
int input_width = im.dims()[2];
|
|
|
|
|
int filter_height = col.dims()[3];
|
|
|
|
|
int filter_width = col.dims()[4];
|
|
|
|
|
int output_height = col.dims()[0];
|
|
|
|
|
|
|
|
|
|
int row_begin, row_end;
|
|
|
|
|
int padding_height = std::max(up_pad, down_pad);
|
|
|
|
|
int padding_width = 0;
|
|
|
|
|
if (up_pad >= down_pad) {
|
|
|
|
|
row_begin = 0;
|
|
|
|
|
} else {
|
|
|
|
|
row_begin = down_pad - up_pad;
|
|
|
|
|
}
|
|
|
|
|
row_end = row_begin + ((input_height + up_pad + down_pad - filter_height) /
|
|
|
|
|
stride_height +
|
|
|
|
|
1);
|
|
|
|
|
|
|
|
|
|
int output_height = row_end - row_begin; // col.dims()[0];
|
|
|
|
|
int output_width = col.dims()[1];
|
|
|
|
|
|
|
|
|
|
int block_dim_x = 0;
|
|
|
|
@ -358,7 +389,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
.stream()>>>(
|
|
|
|
|
im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width,
|
|
|
|
|
padding_height, padding_width, output_height, output_width);
|
|
|
|
|
padding_height, padding_width, output_height, output_width, row_begin,
|
|
|
|
|
row_end);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|