From 62ae93a16b5c7a6fc8c2205f3f8eec91998d5c09 Mon Sep 17 00:00:00 2001 From: wangrao Date: Thu, 21 Jan 2021 09:50:43 +0800 Subject: [PATCH] add type support for reduce operations --- .../kernel_compiler/cpu/reduce_cpu_kernel.cc | 41 ++++++++------- .../kernel_compiler/cpu/reduce_cpu_kernel.h | 50 +++++++++++++++---- 2 files changed, 62 insertions(+), 29 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc index f67922373b..55100037d7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,8 @@ const size_t kMaxDim = 100; static std::map reduce_types_map_ = { {"ReduceMax", 1}, {"ReduceMean", 2}, {"ReduceSum", 3}, {"ReduceMin", 4}}; -void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { +template +void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); @@ -59,18 +60,19 @@ void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { left_dims_ = left_dims_ / stride_; } -bool ReduceCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspaces*/, - const std::vector &outputs) { - size_t out_float_size = left_dims_ * sizeof(float); - size_t in_float_size = stride_ * out_float_size; - if (inputs[0]->size != in_float_size || outputs[0]->size != out_float_size) { +template +bool ReduceCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspaces*/, + const std::vector &outputs) { + size_t out_size = left_dims_ * sizeof(T); + size_t in_size = stride_ * out_size; + if (inputs[0]->size != in_size || outputs[0]->size != out_size) { MS_LOG(EXCEPTION) << "invalid input or output data size!"; } - auto input = reinterpret_cast(inputs[0]->addr); - auto output = reinterpret_cast(outputs[0]->addr); - int size = inputs[0]->size / sizeof(float); - std::vector new_input(IntToSize(size), 0.0); + auto input = reinterpret_cast(inputs[0]->addr); + auto output = reinterpret_cast(outputs[0]->addr); + int size = inputs[0]->size / sizeof(T); + std::vector new_input(IntToSize(size), 0.0); std::vector transpose_axis; for (size_t i = 0; i < shape_.size(); ++i) { bool insert = true; @@ -90,7 +92,8 @@ bool ReduceCPUKernel::Launch(const std::vector &inputs, return true; } -void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) { +template +void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) { auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS); if (axis_addr->isa() || axis_addr->isa()) { std::vector attr_axis; @@ -128,10 +131,11 @@ void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) { } } -void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) { +template +void ReduceCPUKernel::ConvertDataToOutput(const T *new_input, T *output) { if (reduce_type_ == kReduceTypeMax || reduce_type_ == kReduceTypeMin) { for (size_t i = 0; i < left_dims_; ++i) { - float value = new_input[i * stride_]; + T value = new_input[i * stride_]; for (size_t k = 0; k < stride_; ++k) { if (reduce_type_ == kReduceTypeMax) { if (value < new_input[i * stride_ + k]) { @@ -147,7 +151,7 @@ void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) } } else if (reduce_type_ == kReduceTypeMean || reduce_type_ == kReduceTypeSum) { for (size_t i = 0; i < left_dims_; ++i) { - float value = 0.0; + T value = 0.0; for (size_t k = 0; k < stride_; ++k) { value += new_input[i * stride_ + k]; } @@ -162,8 +166,9 @@ void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) } } -void ReduceCPUKernel::Transpose(const int size, const float *input, const std::vector &input_shape, - const std::vector &input_axis, const int shape_size, float *output) { +template +void ReduceCPUKernel::Transpose(const int size, const T *input, const std::vector &input_shape, + const std::vector &input_axis, const int shape_size, T *output) { int size_offset[kMaxDim]; size_offset[0] = size / SizeToInt(input_shape[0]); for (int i = 1; i < shape_size; ++i) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h index a53b9195d0..6f5b356275 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reduce_cpu_kernel.h @@ -23,6 +23,7 @@ namespace mindspore { namespace kernel { +template class ReduceCPUKernel : public CPUKernel { public: ReduceCPUKernel() = default; @@ -32,9 +33,9 @@ class ReduceCPUKernel : public CPUKernel { const std::vector &outputs) override; private: - void Transpose(const int size, const float *input, const std::vector &input_shape, - const std::vector &input_axis, const int shape_size, float *output); - void ConvertDataToOutput(const float *input, float *output); + void Transpose(const int size, const T *input, const std::vector &input_shape, + const std::vector &input_axis, const int shape_size, T *output); + void ConvertDataToOutput(const T *input, T *output); void CheckAxis(const CNodePtr &kernel_node); size_t reduce_type_ = 0; std::vector axis_; @@ -43,14 +44,41 @@ class ReduceCPUKernel : public CPUKernel { size_t stride_ = 1; }; -MS_REG_CPU_KERNEL(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReduceCPUKernel); -MS_REG_CPU_KERNEL(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReduceCPUKernel); -MS_REG_CPU_KERNEL(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReduceCPUKernel); -MS_REG_CPU_KERNEL(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReduceCPUKernel); +MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel, float); +MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), + ReduceCPUKernel, double); +MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ReduceCPUKernel, int32_t); +MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ReduceCPUKernel, int64_t); + +MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel, float); +MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), + ReduceCPUKernel, double); +MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ReduceCPUKernel, int32_t); +MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ReduceCPUKernel, int64_t); + +MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel, float); +MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), + ReduceCPUKernel, double); +MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ReduceCPUKernel, int32_t); +MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ReduceCPUKernel, int64_t); + +MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ReduceCPUKernel, float); +MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), + ReduceCPUKernel, double); +MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ReduceCPUKernel, int32_t); +MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ReduceCPUKernel, int64_t); } // namespace kernel } // namespace mindspore #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_