!11520 Add type support for ReduceMean, ReduceMax, ReduceSum, ReduceMin

From: @wangrao124
Reviewed-by: @wuxuejian,@kisnwang
Signed-off-by: @wuxuejian
pull/11520/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit afd88cdc21

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -31,7 +31,8 @@ const size_t kMaxDim = 100;
static std::map<std::string, int> reduce_types_map_ = {
{"ReduceMax", 1}, {"ReduceMean", 2}, {"ReduceSum", 3}, {"ReduceMin", 4}};
void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) {
template <typename T>
void ReduceCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
@ -59,18 +60,19 @@ void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) {
left_dims_ = left_dims_ / stride_;
}
bool ReduceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
template <typename T>
bool ReduceCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspaces*/,
const std::vector<kernel::AddressPtr> &outputs) {
size_t out_float_size = left_dims_ * sizeof(float);
size_t in_float_size = stride_ * out_float_size;
if (inputs[0]->size != in_float_size || outputs[0]->size != out_float_size) {
size_t out_size = left_dims_ * sizeof(T);
size_t in_size = stride_ * out_size;
if (inputs[0]->size != in_size || outputs[0]->size != out_size) {
MS_LOG(EXCEPTION) << "invalid input or output data size!";
}
auto input = reinterpret_cast<float *>(inputs[0]->addr);
auto output = reinterpret_cast<float *>(outputs[0]->addr);
int size = inputs[0]->size / sizeof(float);
std::vector<float> new_input(IntToSize(size), 0.0);
auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto output = reinterpret_cast<T *>(outputs[0]->addr);
int size = inputs[0]->size / sizeof(T);
std::vector<T> new_input(IntToSize(size), 0.0);
std::vector<size_t> transpose_axis;
for (size_t i = 0; i < shape_.size(); ++i) {
bool insert = true;
@ -90,7 +92,8 @@ bool ReduceCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
return true;
}
void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) {
template <typename T>
void ReduceCPUKernel<T>::CheckAxis(const CNodePtr &kernel_node) {
auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS);
if (axis_addr->isa<ValueTuple>() || axis_addr->isa<ValueList>()) {
std::vector<int> attr_axis;
@ -128,10 +131,11 @@ void ReduceCPUKernel::CheckAxis(const CNodePtr &kernel_node) {
}
}
void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output) {
template <typename T>
void ReduceCPUKernel<T>::ConvertDataToOutput(const T *new_input, T *output) {
if (reduce_type_ == kReduceTypeMax || reduce_type_ == kReduceTypeMin) {
for (size_t i = 0; i < left_dims_; ++i) {
float value = new_input[i * stride_];
T value = new_input[i * stride_];
for (size_t k = 0; k < stride_; ++k) {
if (reduce_type_ == kReduceTypeMax) {
if (value < new_input[i * stride_ + k]) {
@ -147,7 +151,7 @@ void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output)
}
} else if (reduce_type_ == kReduceTypeMean || reduce_type_ == kReduceTypeSum) {
for (size_t i = 0; i < left_dims_; ++i) {
float value = 0.0;
T value = 0.0;
for (size_t k = 0; k < stride_; ++k) {
value += new_input[i * stride_ + k];
}
@ -162,8 +166,9 @@ void ReduceCPUKernel::ConvertDataToOutput(const float *new_input, float *output)
}
}
void ReduceCPUKernel::Transpose(const int size, const float *input, const std::vector<size_t> &input_shape,
const std::vector<size_t> &input_axis, const int shape_size, float *output) {
template <typename T>
void ReduceCPUKernel<T>::Transpose(const int size, const T *input, const std::vector<size_t> &input_shape,
const std::vector<size_t> &input_axis, const int shape_size, T *output) {
int size_offset[kMaxDim];
size_offset[0] = size / SizeToInt(input_shape[0]);
for (int i = 1; i < shape_size; ++i) {

@ -23,6 +23,7 @@
namespace mindspore {
namespace kernel {
template <typename T>
class ReduceCPUKernel : public CPUKernel {
public:
ReduceCPUKernel() = default;
@ -32,9 +33,9 @@ class ReduceCPUKernel : public CPUKernel {
const std::vector<AddressPtr> &outputs) override;
private:
void Transpose(const int size, const float *input, const std::vector<size_t> &input_shape,
const std::vector<size_t> &input_axis, const int shape_size, float *output);
void ConvertDataToOutput(const float *input, float *output);
void Transpose(const int size, const T *input, const std::vector<size_t> &input_shape,
const std::vector<size_t> &input_axis, const int shape_size, T *output);
void ConvertDataToOutput(const T *input, T *output);
void CheckAxis(const CNodePtr &kernel_node);
size_t reduce_type_ = 0;
std::vector<size_t> axis_;
@ -43,14 +44,41 @@ class ReduceCPUKernel : public CPUKernel {
size_t stride_ = 1;
};
MS_REG_CPU_KERNEL(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel);
MS_REG_CPU_KERNEL(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel);
MS_REG_CPU_KERNEL(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel);
MS_REG_CPU_KERNEL(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel);
MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel, float);
MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
ReduceCPUKernel, double);
MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ReduceCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ReduceMean, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReduceCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel, float);
MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
ReduceCPUKernel, double);
MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ReduceCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ReduceMax, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReduceCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel, float);
MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
ReduceCPUKernel, double);
MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ReduceCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ReduceSum, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReduceCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReduceCPUKernel, float);
MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
ReduceCPUKernel, double);
MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ReduceCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(ReduceMin, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReduceCPUKernel, int64_t);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_REDUCE_CPU_KERNEL_H_

Loading…
Cancel
Save