!13214 [MS][LITE]new add int reduce_ mean func

From: @fuzhiye
Reviewed-by: @zhang_xue_tong,@hangangqiang
Signed-off-by: @zhang_xue_tong
pull/13214/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 203ee05893

@ -44,6 +44,49 @@ int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_d
}
return NNACL_OK;
}
int IntReduceMean(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
return NNACL_NULL_PTR;
}
int i, j;
#ifdef ENABLE_NEON
int block_mod = inner_size % C4NUM;
int block_c4 = inner_size - block_mod;
#endif
for (j = tid; j < outer_size; j += thread_num) {
const int *outer_src = src_data + j * axis_size * inner_size;
int *outer_dst = dst_data + j * inner_size;
int k = 0;
#ifdef ENABLE_NEON
for (; k < block_c4; k += C4NUM) {
const int *inner_src = outer_src + k;
int *inner_dst = outer_dst + k;
int32x4_t tmp = {0, 0, 0, 0};
for (i = 0; i < axis_size; i++) {
tmp = vaddq_s32(tmp, vld1q_s32(inner_src + i * inner_size));
}
tmp[0] /= axis_size;
tmp[1] /= axis_size;
tmp[2] /= axis_size;
tmp[3] /= axis_size;
vst1q_s32(inner_dst, tmp);
}
#endif
for (; k < inner_size; k++) {
const int *inner_src = outer_src + k;
int *inner_dst = outer_dst + k;
int tmp = 0;
for (i = 0; i < axis_size; i++) {
tmp += inner_src[i * inner_size];
}
*inner_dst = tmp / axis_size;
}
}
return NNACL_OK;
}
int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
@ -81,6 +124,7 @@ int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_da
}
return NNACL_OK;
}
int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
@ -118,6 +162,7 @@ int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_d
}
return NNACL_OK;
}
int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
@ -139,6 +184,7 @@ int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_da
}
return NNACL_OK;
}
int IntReduceMax(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
@ -160,6 +206,7 @@ int IntReduceMax(int outer_size, int inner_size, int axis_size, const int *src_d
}
return NNACL_OK;
}
int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
@ -181,6 +228,7 @@ int ReduceMin(int outer_size, int inner_size, int axis_size, const float *src_da
}
return NNACL_OK;
}
int IntReduceMin(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {
@ -271,6 +319,7 @@ int IntReduceProd(int outer_size, int inner_size, int axis_size, const int *src_
}
return NNACL_OK;
}
int ReduceSumSquare(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
int thread_num) {
if (src_data == NULL || dst_data == NULL) {

@ -24,6 +24,8 @@ extern "C" {
#endif
int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
int thread_num);
int IntReduceMean(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,
int thread_num);
int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid,
int thread_num);
int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid,

@ -40,54 +40,13 @@ using mindspore::schema::ReduceMode_ReduceSum;
using mindspore::schema::ReduceMode_ReduceSumSquare;
namespace mindspore::kernel {
int ReduceCPUKernel::Init() {
auto ret = ReduceBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
switch (mode_) {
case static_cast<int>(ReduceMode_ReduceSum): {
reducer_ = ReduceSum;
int_reducer_ = IntReduceSum;
break;
}
case static_cast<int>(ReduceMode_ReduceMean): {
reducer_ = ReduceMean;
break;
}
case static_cast<int>(ReduceMode_ReduceMax): {
reducer_ = ReduceMax;
int_reducer_ = IntReduceMax;
break;
}
case static_cast<int>(ReduceMode_ReduceMin): {
reducer_ = ReduceMin;
int_reducer_ = IntReduceMin;
break;
}
case static_cast<int>(ReduceMode_ReduceProd): {
reducer_ = ReduceProd;
int_reducer_ = IntReduceProd;
break;
}
case static_cast<int>(ReduceMode_ReduceSumSquare): {
reducer_ = ReduceSum;
break;
}
case static_cast<int>(ReduceMode_ReduceASum): {
reducer_ = ReduceSum;
break;
}
case static_cast<int>(ReduceMode_ReduceAll): {
bool_reducer_ = ReduceAll;
break;
}
default:
MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_;
return RET_ERROR;
}
InitialKernelList();
if (!InferShapeDone()) {
return RET_OK;
@ -98,19 +57,29 @@ int ReduceCPUKernel::Init() {
int ReduceCPUKernel::ReSize() { return ReduceBaseCPUKernel::ReSize(); }
int ReduceCPUKernel::CallReduceUnit(int task_id) {
int ret;
if (data_type_ == kDataTypeFloat) {
ret = reducer_(outer_size_, inner_size_, axis_size_, static_cast<const float *>(src_data_),
static_cast<float *>(dst_data_), task_id, context_->thread_num_);
if (!reducer_) {
MS_LOG(ERROR) << "function reducer_ is null.";
return RET_NULL_PTR;
}
reducer_(outer_size_, inner_size_, axis_size_, static_cast<const float *>(src_data_),
static_cast<float *>(dst_data_), task_id, context_->thread_num_);
} else if (data_type_ == KDataTypeBool) {
ret = bool_reducer_(outer_size_, inner_size_, axis_size_, static_cast<const bool *>(src_data_),
static_cast<bool *>(dst_data_), task_id, context_->thread_num_);
if (!bool_reducer_) {
MS_LOG(ERROR) << "function bool_reducer_ is null.";
return RET_NULL_PTR;
}
bool_reducer_(outer_size_, inner_size_, axis_size_, static_cast<const bool *>(src_data_),
static_cast<bool *>(dst_data_), task_id, context_->thread_num_);
} else {
ret = int_reducer_(outer_size_, inner_size_, axis_size_, static_cast<const int *>(src_data_),
static_cast<int *>(dst_data_), task_id, context_->thread_num_);
if (!int_reducer_) {
MS_LOG(ERROR) << "function int_reducer_ is null.";
return RET_NULL_PTR;
}
int_reducer_(outer_size_, inner_size_, axis_size_, static_cast<const int *>(src_data_),
static_cast<int *>(dst_data_), task_id, context_->thread_num_);
}
return ret;
return RET_OK;
}
int ReduceImpl(void *cdata, int task_id) {
@ -143,7 +112,7 @@ int ReduceCPUKernel::Run() {
if (i != static_cast<size_t>(num_axes_ - 1)) {
dst_data_ = data_buffers_.at(i);
} else {
dst_data_ = out_tensors_.at(0)->MutableData();
dst_data_ = out_tensors_.at(0)->data_c();
}
outer_size_ = outer_sizes_.at(i);
inner_size_ = inner_sizes_.at(i);
@ -173,7 +142,7 @@ void ReduceCPUKernel::HandleASumAndSumSquare() {
return;
}
int num = in_tensors_.at(0)->ElementsNum();
float *data = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
auto *data = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
if (data == nullptr) {
return;
}
@ -197,7 +166,7 @@ int ReduceCPUKernel::CalculateCoeffOutput() {
if (data_type_ != kDataTypeFloat) {
return RET_ERROR;
}
float *out_data = reinterpret_cast<float *>(out_tensor->MutableData());
auto *out_data = reinterpret_cast<float *>(out_tensor->data_c());
if (out_data == nullptr) {
return RET_NULL_PTR;
}
@ -237,6 +206,26 @@ void ReduceCPUKernel::FreeTmpBuffer() {
data_buffers_.clear();
}
void ReduceCPUKernel::InitialKernelList() {
ReduceKernelList func_list[] = {{ReduceMode_ReduceSum, ReduceSum, IntReduceSum, nullptr},
{ReduceMode_ReduceMean, ReduceMean, IntReduceMean, nullptr},
{ReduceMode_ReduceMax, ReduceMax, IntReduceMax, nullptr},
{ReduceMode_ReduceMin, ReduceMin, IntReduceMin, nullptr},
{ReduceMode_ReduceProd, ReduceProd, IntReduceProd, nullptr},
{ReduceMode_ReduceSumSquare, ReduceSum, IntReduceSum, nullptr},
{ReduceMode_ReduceASum, ReduceSum, IntReduceSum, nullptr},
{ReduceMode_ReduceAll, nullptr, nullptr, ReduceAll}};
int list_len = sizeof(func_list) / sizeof(ReduceKernelList);
for (int i = 0; i < list_len; ++i) {
if (mode_ == func_list[i].type_) {
reducer_ = func_list[i].float_func_;
int_reducer_ = func_list[i].int_func_;
bool_reducer_ = func_list[i].bool_func_;
break;
}
}
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_ReduceFusion, LiteKernelCreator<ReduceCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_ReduceFusion, LiteKernelCreator<ReduceCPUKernel>)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_ReduceFusion, LiteKernelCreator<ReduceCPUKernel>)

@ -26,21 +26,27 @@
using mindspore::schema::ReduceMode;
namespace mindspore::kernel {
class ReduceCPUKernel : public ReduceBaseCPUKernel {
typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
float *dst_data, const int tid, const int thread_num);
typedef int (*IntReducer)(const int outer_size, const int inner_size, const int axis_size, const int *src_data,
int *dst_data, const int tid, const int thread_num);
typedef int (*BoolReducer)(const int outer_size, const int inner_size, const int axis_size, const bool *src_data,
bool *dst_data, const int tid, const int thread_num);
typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
float *dst_data, const int tid, const int thread_num);
typedef int (*IntReducer)(const int outer_size, const int inner_size, const int axis_size, const int *src_data,
int *dst_data, const int tid, const int thread_num);
typedef int (*BoolReducer)(const int outer_size, const int inner_size, const int axis_size, const bool *src_data,
bool *dst_data, const int tid, const int thread_num);
struct ReduceKernelList {
int type_;
Reducer float_func_;
IntReducer int_func_;
BoolReducer bool_func_;
};
class ReduceCPUKernel : public ReduceBaseCPUKernel {
public:
ReduceCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: ReduceBaseCPUKernel(param, inputs, outputs, ctx) {
reduce_param_ = reinterpret_cast<ReduceParameter *>(param);
}
~ReduceCPUKernel() {
~ReduceCPUKernel() override {
src_data_ = nullptr;
dst_data_ = nullptr;
reducer_ = nullptr;
@ -52,6 +58,9 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel {
int Run() override;
int CallReduceUnit(int task_id);
protected:
void InitialKernelList();
private:
ReduceParameter *reduce_param_;
Reducer reducer_ = nullptr;

Loading…
Cancel
Save