diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h index bae315d1c1..3014477a48 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h @@ -74,12 +74,12 @@ class ConcatV2GpuFwdKernel : public GpuKernel { inputs_host_ = std::make_unique(input_num_); len_axis_ = std::make_unique(input_num_); for (int i = 0; i < input_num_; i++) { - int input_size = 1; + size_t input_size = 1; auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); for (size_t j = 0; j < input_shape.size(); j++) { - input_size *= SizeToInt(input_shape[j]); + input_size *= input_shape[j]; } - input_size_list_.push_back(IntToSize(input_size * sizeof(T))); + input_size_list_.push_back(input_size * sizeof(T)); len_axis_[i] = SizeToInt(input_shape[axis_]); } workspace_size_list_.push_back(sizeof(T *) * input_num_); @@ -97,7 +97,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel { all_size_before_axis_ *= output_shape[i]; } } - output_size_list_.push_back(IntToSize(output_size_ * sizeof(T))); + output_size_list_.push_back(output_size_ * sizeof(T)); InitSizeLists(); return true; @@ -117,7 +117,7 @@ class ConcatV2GpuFwdKernel : public GpuKernel { } int axis_; int input_num_; - int output_size_; + size_t output_size_; int all_size_before_axis_; int all_size_axis_; std::unique_ptr inputs_host_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h index b26c01ee10..b1593675ba 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/split_gpu_kernel.h @@ -83,7 +83,7 @@ class SplitGpuFwdKernel : public GpuKernel { all_size_before_axis_ *= input_shape[i]; } } - input_size_list_.push_back(IntToSize(input_size_ * sizeof(T))); + input_size_list_.push_back(input_size_ * sizeof(T)); axis_step_ = input_shape[axis_] / output_num_; for (int i = 0; i < output_num_; i++) { @@ -138,7 +138,7 @@ class SplitGpuFwdKernel : public GpuKernel { } int axis_; int output_num_; - int input_size_; + size_t input_size_; int axis_step_; int all_size_before_axis_; int all_size_axis_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu index c3a77d186d..4866d61dd9 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cu @@ -19,7 +19,7 @@ #include #include "backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh" template -__global__ void Concat(const int size, const int input_num, +__global__ void Concat(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output) { for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { @@ -45,7 +45,7 @@ __global__ void Concat(const int size, const int input_num, } template -void ConcatKernel(const int size, const int input_num, +void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output, cudaStream_t cuda_stream) { @@ -55,15 +55,15 @@ void ConcatKernel(const int size, const int input_num, return; } -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, float** inputs, float* output, cudaStream_t cuda_stream); -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, int** inputs, int* output, cudaStream_t cuda_stream); -template void ConcatKernel(const int size, const int input_num, +template void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, half** inputs, half* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh index 010e2977e2..6e469e8028 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/concatv2_impl.cuh @@ -19,7 +19,7 @@ #include "runtime/device/gpu/cuda_common.h" template -void ConcatKernel(const int size, const int input_num, +void ConcatKernel(const size_t size, const int input_num, const int all_size_before_axis, const int all_size_axis, int* len_axis, T** inputs, T* output, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu index a242290860..e892a3b47d 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cu @@ -19,7 +19,7 @@ #include #include "backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh" template -__global__ void Split(const int size, const int axis_step, const int all_size_before_axis, +__global__ void Split(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs) { for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) { int num = pos % all_size_before_axis / all_size_axis; @@ -32,19 +32,19 @@ __global__ void Split(const int size, const int axis_step, const int all_size_be } template -void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs, cudaStream_t cuda_stream) { Split<<>>(size, axis_step, all_size_before_axis, all_size_axis, input, outputs); return; } -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const float* input, float** outputs, cudaStream_t cuda_stream); -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const int* input, int** outputs, cudaStream_t cuda_stream); -template void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +template void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const half* input, half** outputs, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh index 5306648da8..b8abce290d 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/split_impl.cuh @@ -19,6 +19,6 @@ #include "runtime/device/gpu/cuda_common.h" template -void SplitKernel(const int size, const int axis_step, const int all_size_before_axis, +void SplitKernel(const size_t size, const int axis_step, const int all_size_before_axis, const int all_size_axis, const T* input, T** outputs, cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPLIT_H_