!5071 GPU fix codex for SetDim and SetStride

Merge pull request !5071 from VectorSL/codex
pull/5071/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit de2ce9fde1

@ -80,7 +80,10 @@ class GpuKernel : public KernelMod {
std::swap((*shape)[2], (*shape)[1]);
}
void SetDimA(const std::vector<size_t> &shape, int *dimA, const std::string &format) {
void SetDimA(const std::vector<size_t> &shape, int *dimA, size_t len, const std::string &format) {
if (shape.size() != len) {
MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D.";
}
if (format == "NCHW" || format == "DefaultFormat") {
dimA[0] = SizeToInt(shape[0]);
dimA[1] = SizeToInt(shape[1]);
@ -95,7 +98,10 @@ class GpuKernel : public KernelMod {
MS_LOG(ERROR) << "Unsupported data format " << format;
}
}
void SetStrideA(const std::vector<size_t> &shape, int *strideA, const std::string &format) {
void SetStrideA(const std::vector<size_t> &shape, int *strideA, size_t len, const std::string &format) {
if (shape.size() != len) {
MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D.";
}
if (format == "NCHW" || format == "DefaultFormat") {
strideA[0] = SizeToInt(shape[1] * shape[2] * shape[3]);
strideA[1] = SizeToInt(shape[2] * shape[3]);

@ -147,13 +147,13 @@ class Conv2dGpuFwdKernel : public GpuKernel {
if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") {
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_),
IntToSize(old_width_ + pad_width_)};
SetDimA(padded_shape, dimA, data_format_);
SetStrideA(padded_shape, strideApadded, data_format_);
SetDimA(padded_shape, dimA, 4, data_format_);
SetStrideA(padded_shape, strideApadded, 4, data_format_);
} else if (data_format_ == "NHWC") {
auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_),
IntToSize(c_)};
SetDimA(padded_shape, dimA, data_format_);
SetStrideA(padded_shape, strideApadded, data_format_);
SetDimA(padded_shape, dimA, 4, data_format_);
SetStrideA(padded_shape, strideApadded, 4, data_format_);
}
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(padded_desc_, cudnn_data_type_, 4, dimA, strideApadded),
"cudnnSetTensor4dDescriptor failed");
@ -259,18 +259,18 @@ class Conv2dGpuFwdKernel : public GpuKernel {
void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
const std::vector<size_t> &output_shape) {
int nbDims = 4;
const int nbDims = 4;
int dimA[4];
int strideAin[4];
int dimAout[4];
int strideAout[4];
SetDimA(in_shape, dimA, data_format_);
SetStrideA(in_shape, strideAin, data_format_);
SetDimA(output_shape, dimAout, data_format_);
SetStrideA(output_shape, strideAout, data_format_);
SetDimA(in_shape, dimA, 4, data_format_);
SetStrideA(in_shape, strideAin, 4, data_format_);
SetDimA(output_shape, dimAout, 4, data_format_);
SetStrideA(output_shape, strideAout, 4, data_format_);
int filterDimA[4];
// OHWI for NHWC; OIHW for NCHW
SetDimA(filter_shape, filterDimA, data_format_);
SetDimA(filter_shape, filterDimA, 4, data_format_);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(input_desc_, cudnn_data_type_, nbDims, dimA, strideAin),
"cudnnSetTensor4dDescriptor failed");

@ -148,13 +148,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") {
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_),
IntToSize(old_width_ + pad_width_)};
SetDimA(padded_shape, dimA, data_format_);
SetStrideA(padded_shape, strideApadded, data_format_);
SetDimA(padded_shape, dimA, 4, data_format_);
SetStrideA(padded_shape, strideApadded, 4, data_format_);
} else if (data_format_ == "NHWC") {
auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_),
IntToSize(c_)};
SetDimA(padded_shape, dimA, data_format_);
SetStrideA(padded_shape, strideApadded, data_format_);
SetDimA(padded_shape, dimA, 4, data_format_);
SetStrideA(padded_shape, strideApadded, 4, data_format_);
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
@ -283,15 +283,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
}
void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &filter_shape,
const std::vector<size_t> &in_shape) {
int nbDims = 4;
const int nbDims = 4;
int dimA[4];
int strideAin[4];
int dimAdy[4];
int strideAdy[4];
SetDimA(in_shape, dimA, data_format_);
SetStrideA(in_shape, strideAin, data_format_);
SetDimA(dy_shape, dimAdy, data_format_);
SetStrideA(dy_shape, strideAdy, data_format_);
SetDimA(in_shape, dimA, 4, data_format_);
SetStrideA(in_shape, strideAin, 4, data_format_);
SetDimA(dy_shape, dimAdy, 4, data_format_);
SetStrideA(dy_shape, strideAdy, 4, data_format_);
// filter shape always keep OIHW.
int filterDimA[4] = {SizeToInt(filter_shape[0]), SizeToInt(filter_shape[1]), SizeToInt(filter_shape[2]),
SizeToInt(filter_shape[3])};

@ -149,13 +149,13 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
if (data_format_ == "NCHW" || data_format_ == "DefaultFormat") {
auto padded_shape = {IntToSize(n_), IntToSize(c_), IntToSize(old_height_ + pad_height_),
IntToSize(old_width_ + pad_width_)};
SetDimA(padded_shape, dimA, data_format_);
SetStrideA(padded_shape, strideApadded, data_format_);
SetDimA(padded_shape, dimA, 4, data_format_);
SetStrideA(padded_shape, strideApadded, 4, data_format_);
} else if (data_format_ == "NHWC") {
auto padded_shape = {IntToSize(n_), IntToSize(old_height_ + pad_height_), IntToSize(old_width_ + pad_width_),
IntToSize(c_)};
SetDimA(padded_shape, dimA, data_format_);
SetStrideA(padded_shape, strideApadded, data_format_);
SetDimA(padded_shape, dimA, 4, data_format_);
SetStrideA(padded_shape, strideApadded, 4, data_format_);
}
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetTensorNdDescriptor(padded_descriptor_, cudnn_data_type_, 4, dimA, strideApadded),
@ -284,17 +284,17 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
}
void Set4DDesc(const std::vector<size_t> &dy_shape, const std::vector<size_t> &input_shape,
const std::vector<size_t> &filter_shape) {
int nbDims = 4;
const int nbDims = 4;
int dimA[4];
int strideAin[4];
int dimAdy[4];
int strideAdy[4];
int filterDimA[4];
SetDimA(input_shape, dimA, data_format_);
SetStrideA(input_shape, strideAin, data_format_);
SetDimA(dy_shape, dimAdy, data_format_);
SetStrideA(dy_shape, strideAdy, data_format_);
SetDimA(filter_shape, filterDimA, data_format_);
SetDimA(input_shape, dimA, 4, data_format_);
SetStrideA(input_shape, strideAin, 4, data_format_);
SetDimA(dy_shape, dimAdy, 4, data_format_);
SetStrideA(dy_shape, strideAdy, 4, data_format_);
SetDimA(filter_shape, filterDimA, 4, data_format_);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(dy_desc_, cudnn_data_type_, nbDims, dimAdy, strideAdy),
"cudnnSetTensorNdDescriptor failed");

@ -88,15 +88,15 @@ class PoolingGpuFwdKernel : public GpuKernel {
return true;
}
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
int nbDims = 4;
const int nbDims = 4;
int dimA[4];
int strideAin[4];
int dimAout[4];
int strideAout[4];
SetDimA(input_shape, dimA, data_format_);
SetStrideA(input_shape, strideAin, data_format_);
SetDimA(output_shape, dimAout, data_format_);
SetStrideA(output_shape, strideAout, data_format_);
SetDimA(input_shape, dimA, 4, data_format_);
SetStrideA(input_shape, strideAin, 4, data_format_);
SetDimA(output_shape, dimAout, 4, data_format_);
SetStrideA(output_shape, strideAout, 4, data_format_);
CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnSetTensorNdDescriptor(input_descriptor_, cudnn_data_type_, nbDims, dimA, strideAin),
"cudnnSetTensor4dDescriptor failed");

@ -100,7 +100,7 @@ class PoolingGradGpuKernel : public GpuKernel {
int windowDimA[2] = {window_height, window_width};
int paddingA[2] = {0, 0};
int strideA[2] = {stride_[2], stride_[3]};
int nbDims = 4;
const int nbDims = 4;
int dimA[4];
int strideAin[4];
int dimAy[4];
@ -109,14 +109,14 @@ class PoolingGradGpuKernel : public GpuKernel {
int strideAdy[4];
int dimAout[4];
int strideAout[4];
SetDimA(input_shape, dimA, data_format_);
SetStrideA(input_shape, strideAin, data_format_);
SetDimA(input_mask, dimAy, data_format_);
SetStrideA(input_mask, strideAiny, data_format_);
SetDimA(dout_shape, dimAdy, data_format_);
SetStrideA(dout_shape, strideAdy, data_format_);
SetDimA(output_shape, dimAout, data_format_);
SetStrideA(output_shape, strideAout, data_format_);
SetDimA(input_shape, dimA, 4, data_format_);
SetStrideA(input_shape, strideAin, 4, data_format_);
SetDimA(input_mask, dimAy, 4, data_format_);
SetStrideA(input_mask, strideAiny, 4, data_format_);
SetDimA(dout_shape, dimAdy, 4, data_format_);
SetStrideA(dout_shape, strideAdy, 4, data_format_);
SetDimA(output_shape, dimAout, 4, data_format_);
SetStrideA(output_shape, strideAout, 4, data_format_);
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(y_descriptor_, cudnn_data_type_, nbDims, dimAy, strideAiny),
"cudnnSetTensor4dDescriptor failed");
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensorNdDescriptor(dy_descriptor_, cudnn_data_type_, nbDims, dimAdy, strideAdy),

Loading…
Cancel
Save