diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cu new file mode 100644 index 0000000000..b8718db289 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cu @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh" + +template +__global__ void SquareSumAllKernel(const size_t size, const T* input_addr_0, const T* input_addr_1, + T* output_addr_0, T* output_addr_1) { + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) { + size_t split = size / 2; + if (i < split) { + T ret = input_addr_0[i] * input_addr_0[i]; + MsAtomicAdd(output_addr_0, ret); + } else { + T ret = input_addr_1[i - split] * input_addr_1[i - split]; + MsAtomicAdd(output_addr_1, ret); + } + } + return; +} + +template +__global__ void InitOutput(const size_t size, T *output) { + T zero = 0; + for (size_t id = blockIdx.x * blockDim.x + threadIdx.x; id < size; id += blockDim.x * gridDim.x) { + output[id] = zero; + } + return; +} + +template +void SquareSumAll(const size_t input_size_, const T* input_addr_0, const T* input_addr_1, + T* output_addr_0, T* output_addr_1, cudaStream_t cuda_stream) { + InitOutput<<>>(1, output_addr_0); + InitOutput<<>>(1, output_addr_1); + size_t size = input_size_ * 2; + SquareSumAllKernel<<>>(size, input_addr_0, input_addr_1, + output_addr_0, output_addr_1); +} + +template void SquareSumAll(const size_t input_size_, const half* input_addr_0, const half* input_addr_1, + half* output_addr_0, half* output_addr_1, cudaStream_t cuda_stream); +template void SquareSumAll(const size_t input_size_, const float* input_addr_0, const float* input_addr_1, + float* output_addr_0, float* output_addr_1, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cuh new file mode 100644 index 0000000000..6182786d89 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cuh @@ -0,0 +1,25 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SQUARE_SUM_ALL_IMPL_H_ +#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SQUARE_SUM_ALL_IMPL_H_ + +#include "runtime/device/gpu/cuda_common.h" +template +void SquareSumAll(const size_t input_size_, const T* input_addr_0, const T* input_addr_1, + T* output_addr_0, T* output_addr_1, cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_SQUARE_SUM_ALL_IMPL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.cc new file mode 100644 index 0000000000..dd33c677ff --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.cc @@ -0,0 +1,38 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +MS_REG_GPU_KERNEL_ONE(SquareSumAll, + KernelAttr() + .AddAllSameAttr(true) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + SquareSumAllGpuFwdKernel, half) +MS_REG_GPU_KERNEL_ONE(SquareSumAll, + KernelAttr() + .AddAllSameAttr(true) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + SquareSumAllGpuFwdKernel, float) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.h new file mode 100644 index 0000000000..ef110e83b7 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/square_sum_all_gpu_kernel.h @@ -0,0 +1,84 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SQUARE_SUM_ALL_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SQUARE_SUM_ALL_GPU_KERNEL_H_ + +#include +#include +#include "backend/kernel_compiler/gpu/gpu_kernel.h" +#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" +#include "backend/kernel_compiler/gpu/cuda_impl/square_sum_all_impl.cuh" + +namespace mindspore { +namespace kernel { +template +class SquareSumAllGpuFwdKernel : public GpuKernel { + public: + SquareSumAllGpuFwdKernel() : input_size_(1), is_null_input_(false) {} + ~SquareSumAllGpuFwdKernel() override {} + const std::vector &GetInputSizeList() const override { return input_size_list_; } + const std::vector &GetOutputSizeList() const override { return output_size_list_; } + const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override { + if (is_null_input_) { + return true; + } + T *input_addr_0 = GetDeviceAddress(inputs, 0); + T *input_addr_1 = GetDeviceAddress(inputs, 1); + T *output_addr_0 = GetDeviceAddress(outputs, 0); + T *output_addr_1 = GetDeviceAddress(outputs, 1); + SquareSumAll(input_size_, input_addr_0, input_addr_1, output_addr_0, output_addr_1, + reinterpret_cast(stream_ptr)); + + return true; + } + bool Init(const CNodePtr &kernel_node) override { + auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + is_null_input_ = CHECK_NULL_INPUT(input_shape); + if (is_null_input_) { + MS_LOG(WARNING) << "SquareSumAllGpuFwdKernel input is null"; + } + for (size_t i = 0; i < input_shape.size(); i++) { + input_size_ *= input_shape[i]; + } + InitSizeLists(); + return true; + } + + protected: + void InitSizeLists() override { + input_size_list_.push_back(input_size_ * sizeof(T)); + input_size_list_.push_back(input_size_ * sizeof(T)); + output_size_list_.push_back(sizeof(T)); + output_size_list_.push_back(sizeof(T)); + workspace_size_list_.push_back(0); + } + + private: + std::vector input_size_list_; + std::vector output_size_list_; + std::vector workspace_size_list_; + + size_t input_size_; + bool is_null_input_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SQUARE_SUM_ALL_GPU_KERNEL_H_ diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 726b67df6b..9d280dddaf 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -3688,8 +3688,8 @@ class SquareSumAll(PrimitiveWithInfer): def infer_dtype(self, x_type, y_type): valid_types = (mstype.float16, mstype.float32) - validator.check_tensor_dtype_valid('x1_type', x_type, valid_types, self.name) - validator.check_tensor_dtype_valid('x2_type', y_type, valid_types, self.name) + args = {"x1_type": x_type, "x2_type": y_type} + validator.check_tensors_dtypes_same_and_valid(args, valid_types, self.name) return x_type, y_type