addtensor size limitation to 2G

pull/14682/head
VectorSL 4 years ago
parent 18dfbc9fdc
commit 7b9b84d651

@ -216,7 +216,7 @@ class ArrayReduceGpuKernel : public GpuKernel {
std::vector<size_t> inputA;
std::vector<size_t> outputC_shape = output_shape;
const int split_dim = 4;
CHECK_TENSOR_SIZE(input_shape);
if (input_shape.size() <= split_dim) {
ShapeNdTo4d(input_shape, &inputA);
CHECK_CUDNN_RET_WITH_EXCEPT(

@ -76,6 +76,7 @@ class ActivationGpuFwdKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(input_shape);
std::vector<size_t> shape;
double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 6.0 : 0.0;
if (mode_ == CUDNN_ACTIVATION_ELU) {
@ -85,7 +86,6 @@ class ActivationGpuFwdKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetActivationDescriptor(activation_desc_, mode_, CUDNN_NOT_PROPAGATE_NAN, coef),
"cudnnSetActivationDescriptor failed");
const int split_dim = 4;
if (input_shape.size() <= split_dim) {
ShapeNdTo4d(input_shape, &shape);

@ -84,6 +84,7 @@ class ActivationGradGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(input_shape);
std::vector<size_t> shape;
double coef = (mode_ == CUDNN_ACTIVATION_CLIPPED_RELU) ? 5.999999 : 0.0;
if (mode_ == CUDNN_ACTIVATION_ELU) coef = 1.0;

@ -132,6 +132,7 @@ class BatchNormGpuKernel : public GpuKernel {
if (format_attr == kOpFormat_NHWC) {
format = kOpFormat_NHWC;
}
CHECK_TENSOR_SIZE(shape);
SetTensorDescriptor(format, shape);
InitSizeLists();
return true;
@ -254,7 +255,6 @@ class BatchNormGpuKernel : public GpuKernel {
width = SizeToInt(shape[3]);
cudnn_format = CUDNN_TENSOR_NCHW;
}
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensor4dDescriptor(x_desc_, cudnn_format, cudnn_data_type_, batch, channel, height, width),
"Set x desc failed");

@ -155,6 +155,7 @@ class BatchNormGradGpuKernel : public GpuKernel {
format = kOpFormat_NHWC;
}
beta_data_diff_ = GetAttrWithDefault(kernel_node, "inplace_algo", std::string("cover")) == "cover" ? 0 : 1;
CHECK_TENSOR_SIZE(shape);
SetTensorDescriptor(format, shape);
InitSizeLists();
is_train_ = GetAttr<bool>(kernel_node, "is_training");

@ -99,6 +99,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(in_shape);
SetNCHW(in_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
if (data_format_ == kOpFormat_NHWC) {
compute_format_ = CUDNN_TENSOR_NHWC;

@ -118,6 +118,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(in_shape);
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
format_attr_ = GetAttr<std::string>(kernel_node, "format");
if (format_attr_ == kOpFormat_NHWC) {

@ -133,6 +133,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
ShapeNCHW2NHWC(&input_shape);
}
}
CHECK_TENSOR_SIZE(input_shape);
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
Set4DDesc(dy_shape, input_shape, filter_shape);

@ -98,6 +98,7 @@ class Im2ColGpuFwdKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(in_shape);
Set4DDesc(in_shape, filter_shape, output_shape);
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, 1),
"cudnnSetConvGroupCount failed");

@ -134,6 +134,7 @@ class InstanceNormGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(input_shape_);
SetTensorDescriptor();
InitSizeLists();
return true;

@ -131,6 +131,7 @@ class InstanceNormGradGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(input_shape_);
beta_data_diff_ = GetAttrWithDefault(kernel_node, "inplace_algo", std::string("cover")) == "cover" ? 0 : 1;
SetTensorDescriptor();
InitSizeLists();

@ -110,6 +110,7 @@ class L2NormalizeGpuKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(inputA_shape);
if (inputA_shape.size() > MAX_DIMS) {
MS_LOG(EXCEPTION) << "Broadcast operation not support dim greater than 7";
}

@ -262,7 +262,8 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
std::vector<size_t> inputA;
std::vector<size_t> outputC_shape = output_shape;
constexpr int split_dim = 4;
CHECK_TENSOR_SIZE(input_shape);
CHECK_TENSOR_SIZE(output_shape);
if (input_shape.size() <= split_dim) {
ShapeNdTo4d(input_shape, &inputA);
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,

@ -93,6 +93,7 @@ class PoolingGpuFwdKernel : public GpuKernel {
InitSizeLists();
return true;
}
CHECK_TENSOR_SIZE(input_shape);
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
const int nbDims = 4;
int dimA[4];

@ -96,6 +96,7 @@ class PoolingGradGpuKernel : public GpuKernel {
InitSizeLists();
return false;
}
CHECK_TENSOR_SIZE(input_shape);
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format);
SetDimA(input_shape, dimA, 4, data_format);
SetStrideA(input_shape, strideAin, 4, data_format);

@ -136,6 +136,7 @@ class BatchNormFoldGpuKernel : public GpuKernel {
<< ", but BatchNormFold GpuKernel OP needs 4DTensor input.";
return false;
}
CHECK_TENSOR_SIZE(input_shape);
batch_ = input_shape[0];
channel_ = input_shape[1];
height_ = input_shape[2];

@ -189,7 +189,7 @@ namespace gpu {
#define VARIABLE_NOT_USED(var) \
{ (void)(var); }
inline bool CheckNullInput(std::vector<size_t> input_shape) {
inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
// If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0,
// it means a null input. Just return a null output.
if (input_shape.size() != 0) {
@ -201,6 +201,19 @@ inline bool CheckNullInput(std::vector<size_t> input_shape) {
}
#define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)
// The tensor size is limited to 2G by cudnn.
inline void CheckTensorSize(const std::vector<size_t> &shape) {
size_t total_size = 1;
for (auto i : shape) {
total_size *= i;
}
if (total_size >= 2147483648) {
MS_EXCEPTION(ValueError) << "The total size of the tensor exceeds the max_limit of 2 Giga-elements, which is "
<< total_size << "elements (" << shape << ").";
}
}
#define CHECK_TENSOR_SIZE(shape) mindspore::device::gpu::CheckTensorSize(shape)
#define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \
{ \
curandStatus_t status = (expression); \

Loading…
Cancel
Save