fix conv2d and conv3d: (#20042)

1.support asymmetric padding;
    2.support padding algorithm:"SAME" and "VALID";
    3.support channel_last: data_format NHWC and NDHWC;
    4.change doc of python API and c++;

    test=develop, test=document_preview
fix-python-transpose
liym27 5 years ago committed by Guo Sheng
parent 02c6edc0d5
commit 3aa331d97e

@ -9,6 +9,7 @@ function(op_library TARGET)
set(miopen_hip_cc_srcs)
set(cu_cc_srcs)
set(cudnn_cu_cc_srcs)
set(cudnn_cu_srcs)
set(CUDNN_FILE)
set(mkldnn_cc_srcs)
set(MKLDNN_FILE)
@ -44,6 +45,9 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
endif()
if(WITH_AMD_GPU)
string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cc)
@ -60,6 +64,8 @@ function(op_library TARGET)
foreach(src ${op_library_SRCS})
if (${src} MATCHES ".*\\.hip.cu$")
list(APPEND hip_cu_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND cudnn_cu_srcs ${src})
elseif (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
@ -97,7 +103,7 @@ function(op_library TARGET)
set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
endif()
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
elseif (WITH_AMD_GPU)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
@ -160,6 +166,12 @@ function(op_library TARGET)
endif()
endif()
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
if (WITH_GPU AND ${cudnn_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN
if (WITH_AMD_GPU AND ${miopen_hip_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n")

@ -140,8 +140,8 @@ paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=N
paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'bbb9e708bab250359864fefbdf48e9d9'))
paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types', 'seq_length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b02844e0ad4bd713c5fe6802aa13219c'))
paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'padding_start', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, True, None, None, None, None, None)), ('document', '2bf23e7884c380c3b27f2709aa322cb9'))
paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '06de9adb5994f6f8cb806c75b55550af'))
paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '71b09227709475fa178c1739dff64af6'))
paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None, 'NCHW')), ('document', 'b8da17862ba02b5297a37d2edd571d76'))
paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name', 'data_format'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None, 'NCDHW')), ('document', '73a15322d460ef9aa90d4d237b0bc5d5'))
paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test', 'pad_value'], varargs=None, keywords=None, defaults=(False, 0.0)), ('document', 'e90a93251c52dc4e6fb34fb3991b3f82'))
paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'eaa9d0bbd3d4e017c8bc4ecdac483711'))
paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', 'cee673c79e3ff4582656a24e04f841e5'))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -33,15 +33,18 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
const framework::Tensor& im, const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col->dims().size() == 5);
PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col->dims().size(), 5,
"The dimension of col should be 5.");
if (stride[0] == 1 && stride[1] == 1 && dilation[0] == 1 &&
dilation[1] == 1) {
if (padding[0] == 0 && padding[1] == 0) {
if (padding[0] == 0 && padding[1] == 0 && padding[2] == 0 &&
padding[3] == 0) {
im2col_sh1sw1dh1dw1ph0pw0<T>(im, col);
return;
} else if (padding[0] == 1 && padding[1] == 1) {
} else if (padding[0] == 1 && padding[1] == 1 && padding[2] == 1 &&
padding[3] == 1) {
im2col_sh1sw1dh1dw1ph1pw1<T>(im, col);
return;
}
@ -65,8 +68,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im) {
PADDLE_ENFORCE(im->dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col.dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
@ -136,8 +140,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
const framework::Tensor& im, const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* col) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col->dims().size() == 5);
PADDLE_ENFORCE_EQ(im.dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col->dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
@ -198,8 +203,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
const std::vector<int>& dilation,
const std::vector<int>& stride,
const std::vector<int>& padding, framework::Tensor* im) {
PADDLE_ENFORCE(im->dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
PADDLE_ENFORCE_EQ(im->dims().size(), 3, "The dimension of im should be 3.");
PADDLE_ENFORCE_EQ(col.dims().size(), 5,
"The dimension of col should be 5.");
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];

@ -34,9 +34,10 @@ class Vol2ColFunctor<platform::CPUDeviceContext, T> {
const std::vector<int>& strides,
const std::vector<int>& paddings,
framework::Tensor* col) const {
PADDLE_ENFORCE(vol.dims().size() == 4);
PADDLE_ENFORCE(col->dims().size() == 7);
PADDLE_ENFORCE_EQ(vol.dims().size(), 4,
"The dimension of vol should be 4.");
PADDLE_ENFORCE_EQ(col->dims().size(), 7,
"The dimension of col should be 7.");
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
@ -50,28 +51,35 @@ class Vol2ColFunctor<platform::CPUDeviceContext, T> {
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// changed
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"mismatching.");
const T* vol_data = vol.data<T>();
T* col_data = col->data<T>();
@ -81,11 +89,11 @@ class Vol2ColFunctor<platform::CPUDeviceContext, T> {
int d_offset = (c / filter_width / filter_height) % filter_depth;
int c_in = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0];
int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1];
int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2];
int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2];
int col_idx =
((c * output_depth + d) * output_height + h) * output_width + w;
@ -120,9 +128,10 @@ class Col2VolFunctor<platform::CPUDeviceContext, T> {
const std::vector<int>& strides,
const std::vector<int>& paddings,
framework::Tensor* vol) const {
PADDLE_ENFORCE(vol->dims().size() == 4);
PADDLE_ENFORCE(col.dims().size() == 7);
PADDLE_ENFORCE_EQ(vol->dims().size(), 4,
"The dimension of vol should be 4.");
PADDLE_ENFORCE_EQ(col.dims().size(), 7,
"The dimension of col should be 7.");
int input_channels = vol->dims()[0];
int input_depth = vol->dims()[1];
int input_height = vol->dims()[2];
@ -136,21 +145,29 @@ class Col2VolFunctor<platform::CPUDeviceContext, T> {
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
@ -166,11 +183,11 @@ class Col2VolFunctor<platform::CPUDeviceContext, T> {
int d_offset = (c / filter_width / filter_height) % filter_depth;
int cIm = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0];
int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1];
int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2];
int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2];
if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
w_pad < input_width && d_pad >= 0 && d_pad < input_depth) {

@ -92,27 +92,34 @@ class Vol2ColFunctor<platform::CUDADeviceContext, T> {
int output_height = col->dims()[5];
int output_width = col->dims()[6];
PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"Mismatching.");
PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"Mismatching.");
PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"Mismatching.");
"mismatching.");
int num_outputs =
input_channels * output_depth * output_height * output_width;
@ -122,9 +129,8 @@ class Vol2ColFunctor<platform::CUDADeviceContext, T> {
vol2col<T><<<blocks, threads, 0, context.stream()>>>(
num_outputs, vol.data<T>(), input_depth, input_height, input_width,
dilations[0], dilations[1], dilations[2], filter_depth, filter_height,
filter_width, strides[0], strides[1], strides[2], paddings[0],
paddings[1], paddings[2], output_depth, output_height, output_width,
col->data<T>());
filter_width, strides[0], strides[1], strides[2], pad_d_forth, pad_h_up,
pad_w_left, output_depth, output_height, output_width, col->data<T>());
}
};
@ -218,27 +224,35 @@ class Col2VolFunctor<platform::CUDADeviceContext, T> {
int output_height = col.dims()[5];
int output_width = col.dims()[6];
PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
bool paddings_size_is_6 = (paddings.size() == 6);
int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0];
int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0];
int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1];
int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1];
int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2];
int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2];
PADDLE_ENFORCE_EQ((input_depth + pad_d_forth + pad_d_back -
((dilations[0] * (filter_depth - 1) + 1))) /
strides[0] +
1,
output_depth,
"input_depth and output_depth are "
"Mismatching.");
PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
"mismatching.");
PADDLE_ENFORCE_EQ((input_height + pad_h_up + pad_h_down -
((dilations[1] * (filter_height - 1) + 1))) /
strides[1] +
1,
output_height,
"input_height and output_height are "
"Mismatching.");
PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
"mismatching.");
PADDLE_ENFORCE_EQ((input_width + pad_w_left + pad_w_right -
((dilations[2] * (filter_width - 1) + 1))) /
strides[2] +
1,
output_width,
"input_width and output_width are "
"Mismatching.");
"mismatching.");
int num_kernels = input_channels * input_depth * input_height * input_width;
@ -248,9 +262,8 @@ class Col2VolFunctor<platform::CUDADeviceContext, T> {
col2vol<T><<<blocks, threads, 0, context.stream()>>>(
num_kernels, col.data<T>(), input_depth, input_height, input_width,
dilations[0], dilations[1], dilations[2], filter_depth, filter_height,
filter_width, strides[0], strides[1], strides[2], paddings[0],
paddings[1], paddings[2], output_depth, output_height, output_width,
vol->data<T>());
filter_width, strides[0], strides[1], strides[2], pad_d_forth, pad_h_up,
pad_w_left, output_depth, output_height, output_width, vol->data<T>());
}
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save