|
|
|
@ -66,8 +66,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
|
|
|
|
|
public:
|
|
|
|
|
void operator()(const platform::DeviceContext& context,
|
|
|
|
|
const framework::Tensor& im, framework::Tensor& col,
|
|
|
|
|
int stride_height, int stride_width, int padding_height,
|
|
|
|
|
int padding_width) {
|
|
|
|
|
int stride_height, int stride_width, int padding_up,
|
|
|
|
|
int padding_down, int padding_left, int padding_right) {
|
|
|
|
|
PADDLE_ENFORCE(im.dims().size() == 3);
|
|
|
|
|
PADDLE_ENFORCE(col.dims().size() == 5);
|
|
|
|
|
|
|
|
|
@ -79,6 +79,15 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
|
|
|
|
|
int output_height = col.dims()[3];
|
|
|
|
|
int output_width = col.dims()[4];
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) /
|
|
|
|
|
stride_height +
|
|
|
|
|
1 ==
|
|
|
|
|
output_height);
|
|
|
|
|
PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) /
|
|
|
|
|
stride_width +
|
|
|
|
|
1 ==
|
|
|
|
|
output_width);
|
|
|
|
|
|
|
|
|
|
int num_outputs = input_channels * output_height * output_width;
|
|
|
|
|
int blocks = (num_outputs + 1024 - 1) / 1024;
|
|
|
|
|
int block_x = 512;
|
|
|
|
@ -89,8 +98,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
|
|
|
|
|
reinterpret_cast<const platform::CUDADeviceContext&>(context)
|
|
|
|
|
.stream()>>>(
|
|
|
|
|
im.data<T>(), num_outputs, input_height, input_width, filter_height,
|
|
|
|
|
filter_width, stride_height, stride_width, padding_height,
|
|
|
|
|
padding_width, output_height, output_width, col.data<T>());
|
|
|
|
|
filter_width, stride_height, stride_width, padding_up, padding_left,
|
|
|
|
|
output_height, output_width, col.data<T>());
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -152,7 +161,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
|
|
|
|
|
public:
|
|
|
|
|
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
|
|
|
|
|
const framework::Tensor& col, int stride_height,
|
|
|
|
|
int stride_width, int padding_height, int padding_width) {
|
|
|
|
|
int stride_width, int padding_up, int padding_down,
|
|
|
|
|
int padding_left, int padding_right) {
|
|
|
|
|
PADDLE_ENFORCE(im.dims().size() == 3);
|
|
|
|
|
PADDLE_ENFORCE(col.dims().size() == 5);
|
|
|
|
|
|
|
|
|
@ -164,8 +174,18 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
|
|
|
|
|
int output_height = col.dims()[3];
|
|
|
|
|
int output_width = col.dims()[4];
|
|
|
|
|
|
|
|
|
|
size_t num_kernels = input_channels * (input_height + 2 * padding_height) *
|
|
|
|
|
(input_width + 2 * padding_width);
|
|
|
|
|
PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) /
|
|
|
|
|
stride_height +
|
|
|
|
|
1 ==
|
|
|
|
|
output_height);
|
|
|
|
|
PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) /
|
|
|
|
|
stride_width +
|
|
|
|
|
1 ==
|
|
|
|
|
output_width);
|
|
|
|
|
|
|
|
|
|
size_t num_kernels = input_channels *
|
|
|
|
|
(input_height + padding_up + padding_down) *
|
|
|
|
|
(input_width + padding_left + padding_right);
|
|
|
|
|
|
|
|
|
|
size_t blocks = (num_kernels + 1024 - 1) / 1024;
|
|
|
|
|
size_t block_x = 512;
|
|
|
|
@ -178,10 +198,10 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
|
|
|
|
|
col2im<T><<<grid, threads, 0,
|
|
|
|
|
reinterpret_cast<const platform::CUDADeviceContext&>(context)
|
|
|
|
|
.stream()>>>(
|
|
|
|
|
num_kernels, col.data<T>(), input_height + 2 * padding_height,
|
|
|
|
|
input_width + 2 * padding_width, input_channels, filter_height,
|
|
|
|
|
filter_width, stride_height, stride_width, padding_height,
|
|
|
|
|
padding_width, output_height, output_width, im.data<T>());
|
|
|
|
|
num_kernels, col.data<T>(), input_height + padding_up + padding_down,
|
|
|
|
|
input_width + padding_left + padding_left, input_channels,
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width, padding_up,
|
|
|
|
|
padding_left, output_height, output_width, im.data<T>());
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -238,8 +258,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
public:
|
|
|
|
|
void operator()(const platform::DeviceContext& context,
|
|
|
|
|
const framework::Tensor& im, framework::Tensor& col,
|
|
|
|
|
int stride_height, int stride_width, int padding_height,
|
|
|
|
|
int padding_width) {
|
|
|
|
|
int stride_height, int stride_width, int padding_up,
|
|
|
|
|
int padding_down, int padding_left, int padding_right) {
|
|
|
|
|
PADDLE_ENFORCE(im.dims().size() == 3);
|
|
|
|
|
PADDLE_ENFORCE(col.dims().size() == 5);
|
|
|
|
|
int input_channels = im.dims()[0];
|
|
|
|
@ -250,6 +270,15 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
int output_height = col.dims()[0];
|
|
|
|
|
int output_width = col.dims()[1];
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) /
|
|
|
|
|
stride_height +
|
|
|
|
|
1 ==
|
|
|
|
|
output_height);
|
|
|
|
|
PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) /
|
|
|
|
|
stride_width +
|
|
|
|
|
1 ==
|
|
|
|
|
output_width);
|
|
|
|
|
|
|
|
|
|
int block_dim_x = 0;
|
|
|
|
|
int block_dim_y = 0;
|
|
|
|
|
if (filter_height <= 4 && filter_width <= 4) {
|
|
|
|
@ -274,8 +303,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
reinterpret_cast<const platform::CUDADeviceContext&>(context)
|
|
|
|
|
.stream()>>>(
|
|
|
|
|
im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width,
|
|
|
|
|
padding_height, padding_width, output_height, output_width);
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width, padding_up,
|
|
|
|
|
padding_left, output_height, output_width);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -322,7 +351,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
public:
|
|
|
|
|
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
|
|
|
|
|
const framework::Tensor& col, int stride_height,
|
|
|
|
|
int stride_width, int padding_height, int padding_width) {
|
|
|
|
|
int stride_width, int padding_up, int padding_down,
|
|
|
|
|
int padding_left, int padding_right) {
|
|
|
|
|
PADDLE_ENFORCE(im.dims().size() == 3);
|
|
|
|
|
PADDLE_ENFORCE(col.dims().size() == 5);
|
|
|
|
|
int input_channels = im.dims()[0];
|
|
|
|
@ -333,6 +363,15 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
int output_height = col.dims()[0];
|
|
|
|
|
int output_width = col.dims()[1];
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) /
|
|
|
|
|
stride_height +
|
|
|
|
|
1 ==
|
|
|
|
|
output_height);
|
|
|
|
|
PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) /
|
|
|
|
|
stride_width +
|
|
|
|
|
1 ==
|
|
|
|
|
output_width);
|
|
|
|
|
|
|
|
|
|
int block_dim_x = 0;
|
|
|
|
|
int block_dim_y = 0;
|
|
|
|
|
if (filter_height <= 4 && filter_width <= 4) {
|
|
|
|
@ -357,8 +396,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
|
|
|
|
|
reinterpret_cast<const platform::CUDADeviceContext&>(context)
|
|
|
|
|
.stream()>>>(
|
|
|
|
|
im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width,
|
|
|
|
|
padding_height, padding_width, output_height, output_width);
|
|
|
|
|
filter_height, filter_width, stride_height, stride_width, padding_up,
|
|
|
|
|
padding_left, output_height, output_width);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|