floormod and Add of arithmetic support int type

pull/7278/head
zhaodezan 4 years ago
parent b80fa15918
commit eaaf92a534

@ -348,6 +348,38 @@ int ElementOptAdd(float *input0, float *input1, float *output, int element_size,
return NNACL_OK;
}
int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param) {
#ifdef ENABLE_NEON
int32x4_t vin0_opt = vdupq_n_s32(input0[0]);
int32x4_t vin1_opt = vdupq_n_s32(input1[0]);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= element_size - 4; index += C4NUM) {
int32x4_t vin1 = vld1q_s32(input1 + index);
int32x4_t vout = vaddq_s32(vin0_opt, vin1);
vst1q_s32(output + index, vout);
}
#endif
for (; index < element_size; index++) {
output[index] = input0[0] + input1[index];
}
} else {
#ifdef ENABLE_NEON
for (; index <= element_size - 4; index += C4NUM) {
int32x4_t vin0 = vld1q_s32(input0 + index);
int32x4_t vout = vaddq_s32(vin0, vin1_opt);
vst1q_s32(output + index, vout);
}
#endif
for (; index < element_size; index++) {
output[index] = input0[index] + input1[0];
}
}
return NNACL_OK;
}
int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(input0[0]);
@ -739,6 +771,13 @@ int ElementFloorMod(float *input0, float *input1, float *output, int element_siz
return NNACL_OK;
}
int ElementFloorModInt(int *input0, int *input1, int *output, int element_size) {
for (int i = 0; i < element_size; i++) {
output[i] = input0[i] - (input0[i] / input1[i]) * input1[i];
}
return NNACL_OK;
}
int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
int element_size, ArithmeticParameter *param) {
TileDimensions(input0, input1, tile_input0, tile_input1, param);
@ -752,6 +791,13 @@ int ElementFloorDiv(float *input0, float *input1, float *output, int element_siz
return NNACL_OK;
}
int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size) {
for (int i = 0; i < element_size; i++) {
output[i] = input0[i] / input1[i];
}
return NNACL_OK;
}
int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
int element_size, ArithmeticParameter *param) {
TileDimensions(input0, input1, tile_input0, tile_input1, param);

@ -27,6 +27,7 @@
extern "C" {
#endif
int ElementOptAdd(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param);
int ElementOptAddInt(int *input0, int *input1, int *output, int element_size, ArithmeticParameter *param);
int ElementOptAddRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param);
int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param);
int ElementOptSub(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param);
@ -87,10 +88,12 @@ int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *ti
int element_size, ArithmeticParameter *param);
int ElementFloorDiv(float *input0, float *input1, float *output, int element_size);
int ElementFloorDivInt(int *input0, int *input1, int *output, int element_size);
int BroadcastFloorDiv(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
int element_size, ArithmeticParameter *param);
int ElementFloorMod(float *input0, float *input1, float *output, int element_size);
int ElementFloorModInt(int *input0, int *input1, int *output, int element_size);
int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
int element_size, ArithmeticParameter *param);

@ -83,6 +83,7 @@ int ArithmeticCPUKernel::ReSize() {
default:
arithmeticParameter_->broadcasting_ = false;
arithmetic_opt_run_ = ElementOptAdd;
arithmetic_opt_run_int_ = ElementOptAddInt;
break;
}
break;
@ -299,6 +300,7 @@ kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::Tenso
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Sub, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Div, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_LogicalAnd, CpuArithmeticFp32KernelCreator)
@ -307,6 +309,8 @@ REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Maximum, CpuArithmeticFp32Ker
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Minimum, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SquaredDifference, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Equal, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_NotEqual, CpuArithmeticFp32KernelCreator)

@ -125,9 +125,11 @@ class ArithmeticCPUKernel : public LiteKernel {
break;
case PrimitiveType_FloorDiv:
arithmetic_run_ = ElementFloorDiv;
arithmetic_run_int_ = ElementFloorDivInt;
break;
case PrimitiveType_FloorMod:
arithmetic_run_ = ElementFloorMod;
arithmetic_run_int_ = ElementFloorModInt;
break;
case PrimitiveType_Equal:
arithmetic_run_ = ElementEqual;

Loading…
Cancel
Save