|
|
@ -76,7 +76,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|
|
|
|
|
|
|
|
|
|
|
const float alpha = 1;
|
|
|
|
const float alpha = 1;
|
|
|
|
const float beta = 0;
|
|
|
|
const float beta = 0;
|
|
|
|
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_) {
|
|
|
|
if (use_pad_) {
|
|
|
|
T *padded_addr = GetDeviceAddress<T>(workspace, 1);
|
|
|
|
T *padded_addr = GetDeviceAddress<T>(workspace, 1);
|
|
|
|
if (data_format_ == kOpFormat_NHWC) {
|
|
|
|
if (data_format_ == kOpFormat_NHWC) {
|
|
|
|
CalPadNHWC(padded_size_ / sizeof(T), input_addr, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
|
|
|
CalPadNHWC(padded_size_ / sizeof(T), input_addr, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
|
|
|
@ -133,23 +133,18 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|
|
|
[](const int64_t &value) { return static_cast<int>(value); });
|
|
|
|
[](const int64_t &value) { return static_cast<int>(value); });
|
|
|
|
pad_height_ = pad_list[0];
|
|
|
|
pad_height_ = pad_list[0];
|
|
|
|
pad_width_ = pad_list[2];
|
|
|
|
pad_width_ = pad_list[2];
|
|
|
|
auto symmetry_pad = (pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]);
|
|
|
|
use_pad_ = !((pad_height_ == pad_list[1]) && (pad_width_ == pad_list[3]));
|
|
|
|
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
|
|
|
pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
|
|
|
|
SetStrideAndDilation(kernel_node);
|
|
|
|
SetStrideAndDilation(kernel_node);
|
|
|
|
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
|
|
|
|
cudnnTensorDescriptor_t input_descriptor_real = nullptr;
|
|
|
|
int padA[2];
|
|
|
|
int padA[2];
|
|
|
|
int strideA[2] = {stride_[2], stride_[3]};
|
|
|
|
int strideA[2] = {stride_[2], stride_[3]};
|
|
|
|
int dilaA[2] = {dilation_[2], dilation_[3]};
|
|
|
|
int dilaA[2] = {dilation_[2], dilation_[3]};
|
|
|
|
if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase || !symmetry_pad) {
|
|
|
|
if (use_pad_) {
|
|
|
|
pad_height_ = pad_list[0] + pad_list[1];
|
|
|
|
pad_height_ = pad_list[0] + pad_list[1];
|
|
|
|
pad_width_ = pad_list[2] + pad_list[3];
|
|
|
|
pad_width_ = pad_list[2] + pad_list[3];
|
|
|
|
pad_top_ = pad_list[0];
|
|
|
|
pad_top_ = pad_list[0];
|
|
|
|
pad_left_ = pad_list[2];
|
|
|
|
pad_left_ = pad_list[2];
|
|
|
|
|
|
|
|
|
|
|
|
// if use_pad_ == true, using zero padding in advance, else using the default cudnn pad.
|
|
|
|
|
|
|
|
if (pad_height_ % 2 == 0 && pad_width_ % 2 == 0) {
|
|
|
|
|
|
|
|
use_pad_ = false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
int dimA[4];
|
|
|
|
int dimA[4];
|
|
|
|
int strideApadded[4];
|
|
|
|
int strideApadded[4];
|
|
|
|
if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) {
|
|
|
|
if (data_format_ == kOpFormat_NCHW || data_format_ == kOpFormat_DEFAULT) {
|
|
|
@ -165,18 +160,12 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded),
|
|
|
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded),
|
|
|
|
"cudnnSetTensor4dDescriptor failed");
|
|
|
|
"cudnnSetTensor4dDescriptor failed");
|
|
|
|
|
|
|
|
padA[0] = 0;
|
|
|
|
if (use_pad_) {
|
|
|
|
padA[1] = 0;
|
|
|
|
padA[0] = 0;
|
|
|
|
|
|
|
|
padA[1] = 0;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
padA[0] = pad_top_;
|
|
|
|
|
|
|
|
padA[1] = pad_left_;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
|
|
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
|
|
|
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
|
|
|
cudnnSetConvolutionNdDescriptor(conv_desc_, 2, padA, strideA, dilaA, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
|
|
|
|
"cudnnSetConvolutionNdDescriptor failed");
|
|
|
|
"cudnnSetConvolutionNdDescriptor failed");
|
|
|
|
input_descriptor_real = use_pad_ ? padded_desc_ : input_desc_;
|
|
|
|
input_descriptor_real = padded_desc_;
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
|
|
|
if (pad_mode_ == kValidPadModeUpperCase || pad_mode_ == kValidPadModeLowerCase) {
|
|
|
|
pad_height_ = 0;
|
|
|
|
pad_height_ = 0;
|
|
|
@ -232,7 +221,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
|
|
|
|
input_size_list_.push_back(input_size_);
|
|
|
|
input_size_list_.push_back(input_size_);
|
|
|
|
input_size_list_.push_back(filter_size_);
|
|
|
|
input_size_list_.push_back(filter_size_);
|
|
|
|
output_size_list_.push_back(output_size_);
|
|
|
|
output_size_list_.push_back(output_size_);
|
|
|
|
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
|
|
|
if (use_pad_ && !is_null_input_) {
|
|
|
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
|
|
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
|
|
|
cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle_, padded_desc_, filter_desc_, conv_desc_, output_desc_,
|
|
|
|
cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle_, padded_desc_, filter_desc_, conv_desc_, output_desc_,
|
|
|
|
conv_algorithm_, &workspace_size_),
|
|
|
|
conv_algorithm_, &workspace_size_),
|
|
|
|