diff --git a/mindspore/lite/nnacl/adder.h b/mindspore/lite/nnacl/adder.h index 1399510888..00c92796f3 100644 --- a/mindspore/lite/nnacl/adder.h +++ b/mindspore/lite/nnacl/adder.h @@ -16,9 +16,7 @@ #ifndef MINDSPORE_LITE_NNACL_ADDER_H_ #define MINDSPORE_LITE_NNACL_ADDER_H_ -#include #include "nnacl/op_base.h" -#include "nnacl/quantization/fixed_point.h" typedef struct AdderParameter { OpParameter op_parameter_; diff --git a/mindspore/lite/nnacl/flatten.c b/mindspore/lite/nnacl/arithmetic.c similarity index 50% rename from mindspore/lite/nnacl/flatten.c rename to mindspore/lite/nnacl/arithmetic.c index 4f9936741a..c595a37637 100644 --- a/mindspore/lite/nnacl/flatten.c +++ b/mindspore/lite/nnacl/arithmetic.c @@ -13,9 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "nnacl/flatten.h" -#include -void Flatten(const void *input, void *output, const FlattenParameter *flatten_param) { - memcpy(output, input, flatten_param->size); +#include "nnacl/arithmetic.h" + +void CalcMultiplesAndStrides(ArithmeticParameter *param) { + NNACL_ASSERT(param->in_shape0_[i] != 0); + NNACL_ASSERT(param->in_shape1_[i] != 0); + for (size_t i = 0; i < param->ndim_; i++) { + param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; + param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; + } + // cal strides + ComputeStrides(param->in_shape0_, param->in_strides0_, param->ndim_); + ComputeStrides(param->in_shape1_, param->in_strides1_, param->ndim_); + ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); } diff --git a/mindspore/lite/nnacl/flatten.h b/mindspore/lite/nnacl/arithmetic.h similarity index 53% rename from mindspore/lite/nnacl/flatten.h rename to mindspore/lite/nnacl/arithmetic.h index 8409b8dd13..5b6babee17 100644 --- a/mindspore/lite/nnacl/flatten.h +++ b/mindspore/lite/nnacl/arithmetic.h @@ -13,23 +13,41 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_LITE_NNACL_FLATTEN_H_ -#define MINDSPORE_LITE_NNACL_FLATTEN_H_ + +#ifndef MINDSPORE_LITE_NNACL_ARTITHMETIC_H_ +#define MINDSPORE_LITE_NNACL_ARTITHMETIC_H_ + #include "nnacl/op_base.h" +#include "nnacl/common_func.h" +#include "nnacl/nnacl_utils.h" -typedef struct FlattenParameter { - // Primitive parameter +typedef struct ArithmeticParameter { OpParameter op_parameter_; - // other parameter - int size; -} FlattenParameter; + bool broadcasting_; + size_t ndim_; + int activation_type_; + int in_shape0_[10]; + int in_elements_num0_; + int in_shape1_[10]; + int in_elements_num1_; + + int out_shape_[10]; + int out_elements_num_; + + int in_strides0_[10]; + int in_strides1_[10]; + int out_strides_[10]; + + int multiples0_[10]; + int multiples1_[10]; +} ArithmeticParameter; #ifdef __cplusplus extern "C" { #endif -void Flatten(const void *input, void *output, const FlattenParameter *flatten_param); +void CalcMultiplesAndStrides(ArithmeticParameter *param); #ifdef __cplusplus } #endif -#endif // MINDSPORE_LITE_NNACL_FLATTEN_H_ +#endif // MINDSPORE_LITE_NNACL_ARTITHMETIC_H_ diff --git a/mindspore/lite/nnacl/arithmetic_common.c b/mindspore/lite/nnacl/arithmetic_common.c deleted file mode 100644 index 47ff029e8e..0000000000 --- a/mindspore/lite/nnacl/arithmetic_common.c +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "nnacl/arithmetic_common.h" -#include "nnacl/nnacl_utils.h" - -void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, - const int *inStrides, const int *outStrides, const int *multiple) { - int srcDimSize = inShape[dim]; - if (dim == ndim - 1) { - for (int i = 0; i < multiple[dim]; i++) { - memcpy(outData, inData, srcDimSize * sizeof(float)); - outData += srcDimSize; - } - return; - } - for (size_t i = 0; i < srcDimSize; i++) { - for (size_t j = 0; j < multiple[dim]; j++) { - TileOneDimension(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim, - inShape, inStrides, outStrides, multiple); - } - } -} - -void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, - const int *inStrides, const int *outStrides, const int *multiple) { - int srcDimSize = inShape[dim]; - if (dim == ndim - 1) { - for (int i = 0; i < multiple[dim]; i++) { - memcpy(outData, inData, srcDimSize * sizeof(uint8_t)); - outData += srcDimSize; - } - return; - } - for (size_t i = 0; i < srcDimSize; i++) { - for (size_t j = 0; j < multiple[dim]; j++) { - TileOneDimensionUint8(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, - ndim, inShape, inStrides, outStrides, multiple); - } - } -} - -void ComputeStrides(const int *shape, int *strides, const int ndim) { - int stride = 1; - for (int i = ndim - 1; i >= 0; i--) { - strides[i] = stride; - stride *= shape[i]; - } -} - -void CalcMultiplesAndStrides(ArithmeticParameter *param) { - NNACL_ASSERT(param->in_shape0_[i] != 0); - NNACL_ASSERT(param->in_shape1_[i] != 0); - for (size_t i = 0; i < param->ndim_; i++) { - param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; - param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; - } - // cal strides - ComputeStrides(param->in_shape0_, param->in_strides0_, param->ndim_); - ComputeStrides(param->in_shape1_, param->in_strides1_, param->ndim_); - ComputeStrides(param->out_shape_, param->out_strides_, param->ndim_); -} - -void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, - ArithmeticParameter *param) { - CalcMultiplesAndStrides(param); - TileOneDimension(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, - param->multiples0_); - TileOneDimension(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, - param->multiples1_); -} - -void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, - ArithmeticParameter *param) { - CalcMultiplesAndStrides(param); - TileOneDimensionUint8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, - param->multiples0_); - TileOneDimensionUint8(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, - param->multiples1_); -} - -void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, - ArithmeticParameter *param) { - CalcMultiplesAndStrides(param); - TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_, - param->in_strides0_, param->out_strides_, param->multiples0_); - TileOneDimensionUint8((uint8_t *)(data1), (uint8_t *)(tile_data1), 0, param->ndim_, param->in_shape1_, - param->in_strides1_, param->out_strides_, param->multiples1_); -} diff --git a/mindspore/lite/nnacl/arithmetic_common.h b/mindspore/lite/nnacl/arithmetic_common.h deleted file mode 100644 index d9e08ad46e..0000000000 --- a/mindspore/lite/nnacl/arithmetic_common.h +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_LITE_NNACL_ARITHMETIC_COMMON_H_ -#define MINDSPORE_LITE_NNACL_ARITHMETIC_COMMON_H_ - -#ifdef ENABLE_NEON -#include -#endif -#include -#include "nnacl/op_base.h" -#include "nnacl/arithmetic_common.h" - -typedef struct ArithmeticParameter { - OpParameter op_parameter_; - bool broadcasting_; - size_t ndim_; - int activation_type_; - int in_shape0_[10]; - int in_elements_num0_; - int in_shape1_[10]; - int in_elements_num1_; - - int out_shape_[10]; - int out_elements_num_; - - int in_strides0_[10]; - int in_strides1_[10]; - int out_strides_[10]; - - int multiples0_[10]; - int multiples1_[10]; -} ArithmeticParameter; - -#ifdef __cplusplus -extern "C" { -#endif -void TileOneDimension(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, - const int *inStrides, const int *outStrides, const int *multiple); -void ComputeStrides(const int *shape, int *strides, const int ndim); - -void CalcMultiplesAndStrides(ArithmeticParameter *param); - -void TileOneDimensionUint8(const uint8_t *inData, uint8_t *outData, int dim, size_t ndim, const int *inShape, - const int *inStrides, const int *outStrides, const int *multiple); -void TileDimensions(const float *data0, const float *data1, float *tile_data0, float *tile_data1, - ArithmeticParameter *param); -void TileDimensionsUint8(const uint8_t *data0, const uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, - ArithmeticParameter *param); -void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, - ArithmeticParameter *param); -#ifdef __cplusplus -} -#endif - -#endif // MINDSPORE_LITE_NNACL_ARITHMETIC_COMMON_H_ diff --git a/mindspore/lite/nnacl/arithmetic_parameter.h b/mindspore/lite/nnacl/arithmetic_parameter.h deleted file mode 100644 index d7d2b73a20..0000000000 --- a/mindspore/lite/nnacl/arithmetic_parameter.h +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_LITE_NNACL_ARTITHMETIC_PARAMETER_H_ -#define MINDSPORE_LITE_NNACL_ARTITHMETIC_PARAMETER_H_ - -#include "nnacl/op_attribute.h" - -#endif // MINDSPORE_LITE_NNACL_ARTITHMETIC_PARAMETER_H_ diff --git a/mindspore/lite/nnacl/batch_to_space.c b/mindspore/lite/nnacl/batch_to_space.c index 94bb4875fb..16ebbaf5ee 100644 --- a/mindspore/lite/nnacl/batch_to_space.c +++ b/mindspore/lite/nnacl/batch_to_space.c @@ -15,7 +15,6 @@ */ #include "nnacl/batch_to_space.h" -#include "nnacl/arithmetic_common.h" void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, int data_size) { diff --git a/mindspore/lite/nnacl/batch_to_space.h b/mindspore/lite/nnacl/batch_to_space.h index 43b15bacac..2098aad52a 100644 --- a/mindspore/lite/nnacl/batch_to_space.h +++ b/mindspore/lite/nnacl/batch_to_space.h @@ -15,6 +15,8 @@ */ #ifndef MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_H_ #define MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_H_ + +#include #include "nnacl/op_base.h" #define BATCH_TO_SPACE_BLOCK_SHAPE_SIZE 2 diff --git a/mindspore/lite/nnacl/common_func.h b/mindspore/lite/nnacl/common_func.h index 2173d11fbd..1e6dc30d27 100644 --- a/mindspore/lite/nnacl/common_func.h +++ b/mindspore/lite/nnacl/common_func.h @@ -63,6 +63,14 @@ static inline int GetStride(int *strides, const int *shape, int length) { return stride; } +inline void ComputeStrides(const int *shape, int *strides, const int ndim) { + int stride = 1; + for (int i = ndim - 1; i >= 0; i--) { + strides[i] = stride; + stride *= shape[i]; + } +} + #ifdef ENABLE_ARM64 void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size); void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size); diff --git a/mindspore/lite/nnacl/fp16/arithmetic_fp16.c b/mindspore/lite/nnacl/fp16/arithmetic_fp16.c index c707474d5c..bb77985601 100644 --- a/mindspore/lite/nnacl/fp16/arithmetic_fp16.c +++ b/mindspore/lite/nnacl/fp16/arithmetic_fp16.c @@ -16,7 +16,7 @@ #include "nnacl/fp16/arithmetic_fp16.h" #include -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" #include "nnacl/nnacl_utils.h" void TileOneDimensionFp16(float16_t *inData, float16_t *outData, int dim, size_t ndim, int *inShape, int *inStrides, diff --git a/mindspore/lite/nnacl/fp16/arithmetic_fp16.h b/mindspore/lite/nnacl/fp16/arithmetic_fp16.h index f27b9d25b5..34a7ce96da 100644 --- a/mindspore/lite/nnacl/fp16/arithmetic_fp16.h +++ b/mindspore/lite/nnacl/fp16/arithmetic_fp16.h @@ -20,7 +20,7 @@ #include #endif #include "nnacl/op_base.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "nnacl/errorcode.h" #ifdef __cplusplus @@ -107,7 +107,7 @@ int ElementMinimumFp16(float16_t *input0, float16_t *input1, float16_t *output, int ElementNotEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); int ElementEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); int ElementLessFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); -int ElementLessEqual(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); +int ElementLessEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); int ElementGreaterFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); int ElementGreaterEqualFp16(float16_t *input0, float16_t *input1, uint8_t *output, int element_size); diff --git a/mindspore/lite/nnacl/fp16/stack_fp16.c b/mindspore/lite/nnacl/fp16/stack_fp16.c index 122657d559..4172053f91 100644 --- a/mindspore/lite/nnacl/fp16/stack_fp16.c +++ b/mindspore/lite/nnacl/fp16/stack_fp16.c @@ -15,7 +15,7 @@ */ #include "nnacl/fp16/stack_fp16.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" size_t Fp16GetStackCopyNum(int axis, int *in_shape, size_t shape_size) { size_t one_input_size = 1; diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c index 1a9869269e..56e4ee5c13 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c +++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c @@ -20,1230 +20,995 @@ #define ACCURACY_DATA 0.00000001 -int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMul(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmulq_f32(vin0_opt, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[0] * input1[index]; + for (; index < size; index++) { + out[index] = in0[0] * in1[index]; } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vmulq_f32(vin0, vin1_opt); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] * input1[0]; + for (; index < size; index++) { + out[index] = in0[index] * in1[0]; } } return NNACL_OK; } -int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMulRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); float32x4_t zeros = vdupq_n_f32(0.0f); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmaxq_f32(vmulq_f32(vin0_opt, vin1), zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[0] * input1[index], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[0] * in1[index], 0); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vmaxq_f32(vmulq_f32(vin0, vin1_opt), zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[index] * input1[0], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[index] * in1[0], 0); } } return NNACL_OK; } -int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMulRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); float32x4_t zeros = vdupq_n_f32(0.0f); float32x4_t bounds = vdupq_n_f32(6.0f); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vmulq_f32(vin0_opt, vin1), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[0] * input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[0] * in1[index], 0), 6); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vmulq_f32(vin0, vin1_opt), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] * input1[0], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] * in1[0], 0), 6); } } return NNACL_OK; } -int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMulInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - int32x4_t vin0_opt = vdupq_n_s32(input0[0]); - int32x4_t vin1_opt = vdupq_n_s32(input1[0]); + int32x4_t vin0_opt = vdupq_n_s32(in0[0]); + int32x4_t vin1_opt = vdupq_n_s32(in1[0]); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vmulq_s32(vin0_opt, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[0] * input1[index]; + for (; index < size; index++) { + out[index] = in0[0] * in1[index]; } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); int32x4_t vout = vmulq_s32(vin0, vin1_opt); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] * input1[0]; + for (; index < size; index++) { + out[index] = in0[index] * in1[0]; } } return NNACL_OK; } -int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMulReluInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - int32x4_t vin0_opt = vdupq_n_s32(input0[0]); - int32x4_t vin1_opt = vdupq_n_s32(input1[0]); + int32x4_t vin0_opt = vdupq_n_s32(in0[0]); + int32x4_t vin1_opt = vdupq_n_s32(in1[0]); int32x4_t zeros = vdupq_n_s32(0); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vmaxq_s32(vmulq_s32(vin0_opt, vin1), zeros); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[0] * input1[index], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[0] * in1[index], 0); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); int32x4_t vout = vmaxq_s32(vmulq_s32(vin0, vin1_opt), zeros); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[index] * input1[0], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[index] * in1[0], 0); } } return NNACL_OK; } -int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMulRelu6Int(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - int32x4_t vin0_opt = vdupq_n_s32(input0[0]); - int32x4_t vin1_opt = vdupq_n_s32(input1[0]); + int32x4_t vin0_opt = vdupq_n_s32(in0[0]); + int32x4_t vin1_opt = vdupq_n_s32(in1[0]); int32x4_t zeros = vdupq_n_s32(0); int32x4_t bounds = vdupq_n_s32(6); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vminq_s32(vmaxq_s32(vmulq_s32(vin0_opt, vin1), zeros), bounds); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[0] * input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[0] * in1[index], 0), 6); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); int32x4_t vout = vminq_s32(vmaxq_s32(vmulq_s32(vin0, vin1_opt), zeros), bounds); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] * input1[0], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] * in1[0], 0), 6); } } return NNACL_OK; } -int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vsubq_f32(vin0_opt, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[0] - input1[index]; + for (; index < size; index++) { + out[index] = in0[0] - in1[index]; } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vsubq_f32(vin0, vin1_opt); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] - input1[0]; + for (; index < size; index++) { + out[index] = in0[index] - in1[0]; } } return NNACL_OK; } -int ElementOptSubInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - int32x4_t vin0_opt = vdupq_n_s32(input0[0]); - int32x4_t vin1_opt = vdupq_n_s32(input1[0]); + int32x4_t vin0_opt = vdupq_n_s32(in0[0]); + int32x4_t vin1_opt = vdupq_n_s32(in1[0]); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vsubq_s32(vin0_opt, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[0] - input1[index]; + for (; index < size; index++) { + out[index] = in0[0] - in1[index]; } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); int32x4_t vout = vsubq_s32(vin0, vin1_opt); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] - input1[0]; + for (; index < size; index++) { + out[index] = in0[index] - in1[0]; } } return NNACL_OK; } -int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); float32x4_t zeros = vdupq_n_f32(0.0f); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[0] - input1[index], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[0] - in1[index], 0); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[index] - input1[0], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[index] - in1[0], 0); } } return NNACL_OK; } -int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); float32x4_t zeros = vdupq_n_f32(0.0f); float32x4_t bounds = vdupq_n_f32(6.0f); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[0] - input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[0] - in1[index], 0), 6); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] - input1[0], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] - in1[0], 0), 6); } } return NNACL_OK; } -int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vaddq_f32(vin0_opt, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[0] + input1[index]; + for (; index < size; index++) { + out[index] = in0[0] + in1[index]; } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vaddq_f32(vin0, vin1_opt); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] + input1[0]; + for (; index < size; index++) { + out[index] = in0[index] + in1[0]; } } return NNACL_OK; } -int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - int32x4_t vin0_opt = vdupq_n_s32(input0[0]); - int32x4_t vin1_opt = vdupq_n_s32(input1[0]); + int32x4_t vin0_opt = vdupq_n_s32(in0[0]); + int32x4_t vin1_opt = vdupq_n_s32(in1[0]); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vaddq_s32(vin0_opt, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[0] + input1[index]; + for (; index < size; index++) { + out[index] = in0[0] + in1[index]; } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); int32x4_t vout = vaddq_s32(vin0, vin1_opt); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] + input1[0]; + for (; index < size; index++) { + out[index] = in0[index] + in1[0]; } } return NNACL_OK; } -int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); float32x4_t zeros = vdupq_n_f32(0.0f); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[0] + input1[index], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[0] + in1[index], 0); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMAX(input0[index] + input1[0], 0); + for (; index < size; index++) { + out[index] = MSMAX(in0[index] + in1[0], 0); } } return NNACL_OK; } -int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { #ifdef ENABLE_NEON - float32x4_t vin0_opt = vdupq_n_f32(input0[0]); - float32x4_t vin1_opt = vdupq_n_f32(input1[0]); + float32x4_t vin0_opt = vdupq_n_f32(in0[0]); + float32x4_t vin1_opt = vdupq_n_f32(in1[0]); float32x4_t zeros = vdupq_n_f32(0.0f); float32x4_t bounds = vdupq_n_f32(6.0f); #endif int index = 0; if (param->in_elements_num0_ == 1) { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[0] + input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[0] + in1[index], 0), 6); } } else { #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] + input1[0], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] + in1[0], 0), 6); } } return NNACL_OK; } -int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { - for (int index = 0; index < element_size; index++) { - output[index] = input0[0] / input1[index]; + for (int index = 0; index < size; index++) { + out[index] = in0[0] / in1[index]; } } else { - if (input1[0] == 0) { + if (in1[0] == 0) { return NNACL_ERRCODE_DIVISOR_ZERO; } - for (int index = 0; index < element_size; index++) { - output[index] = input0[index] / input1[0]; + for (int index = 0; index < size; index++) { + out[index] = in0[index] / in1[0]; } } return NNACL_OK; } -int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { - for (int index = 0; index < element_size; index++) { - output[index] = input0[0] / input1[index]; - output[index] = output[index] > 0 ? output[index] : 0; + for (int index = 0; index < size; index++) { + out[index] = in0[0] / in1[index]; + out[index] = out[index] > 0 ? out[index] : 0; } } else { - for (int index = 0; index < element_size; index++) { - output[index] = input0[index] / input1[0]; - output[index] = output[index] > 0 ? output[index] : 0; + for (int index = 0; index < size; index++) { + out[index] = in0[index] / in1[0]; + out[index] = out[index] > 0 ? out[index] : 0; } } return NNACL_OK; } -int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { - for (int index = 0; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6); + for (int index = 0; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[0] / in1[index], 0), 6); } } else { - for (int index = 0; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] / input1[0], 0), 6); + for (int index = 0; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] / in1[0], 0), 6); } } return NNACL_OK; } -int ElementOptDivInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { - for (int index = 0; index < element_size; index++) { - output[index] = input0[0] / input1[index]; + for (int index = 0; index < size; index++) { + out[index] = in0[0] / in1[index]; } } else { - if (input1[0] == 0) { + if (in1[0] == 0) { return NNACL_ERRCODE_DIVISOR_ZERO; } - for (int index = 0; index < element_size; index++) { - output[index] = input0[index] / input1[0]; + for (int index = 0; index < size; index++) { + out[index] = in0[index] / in1[0]; } } return NNACL_OK; } -int ElementMul(const float *input0, const float *input1, float *output, const int element_size) { +int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param) { + TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param); + return ElementAdd(tile_in0, tile_in1, out, size); +} + +int BroadcastMul(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param) { + TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param); + return ElementMul(tile_in0, tile_in1, out, size); +} + +int ElementMul(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmulq_f32(vin0, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] * input1[index]; + for (; index < size; index++) { + out[index] = in0[index] * in1[index]; } return NNACL_OK; } -int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size) { +int ElementMulRelu(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t zeros = vdupq_n_f32(0.0f); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmulq_f32(vin0, vin1); vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - float res = input0[index] * input1[index]; - output[index] = res > 0 ? res : 0; + for (; index < size; index++) { + float res = in0[index] * in1[index]; + out[index] = res > 0 ? res : 0; } return NNACL_OK; } -int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size) { +int ElementMulRelu6(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t zeros = vdupq_n_f32(0.0f); float32x4_t bounds = vdupq_n_f32(6.0f); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vmulq_f32(vin0, vin1), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] * input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] * in1[index], 0), 6); } return NNACL_OK; } -int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size) { +int ElementMulInt(const int *in0, const int *in1, int *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vmulq_s32(vin0, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] * input1[index]; + for (; index < size; index++) { + out[index] = in0[index] * in1[index]; } return NNACL_OK; } -int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size) { +int ElementMulReluInt(const int *in0, const int *in1, int *out, int size) { int index = 0; #ifdef ENABLE_NEON int32x4_t zeros = vdupq_n_s32(0); - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vmulq_s32(vin0, vin1); vout = vbslq_s32(vcgtq_s32(vout, zeros), vout, zeros); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - float res = input0[index] * input1[index]; - output[index] = res > 0 ? res : 0; + for (; index < size; index++) { + float res = in0[index] * in1[index]; + out[index] = res > 0 ? res : 0; } return NNACL_OK; } -int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size) { +int ElementMulRelu6Int(const int *in0, const int *in1, int *out, int size) { int index = 0; #ifdef ENABLE_NEON int32x4_t zeros = vdupq_n_s32(0); int32x4_t bounds = vdupq_n_s32(6); - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vminq_s32(vmaxq_s32(vmulq_s32(vin0, vin1), zeros), bounds); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] * input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] * in1[index], 0), 6); } return NNACL_OK; } -int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementMul(tile_input0, tile_input1, output, element_size); -} - -int ElementAdd(const float *input0, const float *input1, float *output, const int element_size) { +int ElementAdd(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vaddq_f32(vin0, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] + input1[index]; + for (; index < size; index++) { + out[index] = in0[index] + in1[index]; } return NNACL_OK; } -int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size) { +int ElementAddRelu(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t zeros = vdupq_n_f32(0.0f); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vaddq_f32(vin0, vin1); vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - float res = input0[index] + input1[index]; - output[index] = res > 0 ? res : 0; + for (; index < size; index++) { + float res = in0[index] + in1[index]; + out[index] = res > 0 ? res : 0; } return NNACL_OK; } -int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size) { +int ElementAddRelu6(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t zeros = vdupq_n_f32(0.0f); float32x4_t bounds = vdupq_n_f32(6.0f); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] + input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] + in1[index], 0), 6); } return NNACL_OK; } -int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size) { +int ElementAddInt(const int *in0, const int *in1, int *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vaddq_s32(vin0, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] + input1[index]; + for (; index < size; index++) { + out[index] = in0[index] + in1[index]; } return NNACL_OK; } -int ElementAddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = input0[i] + input1[i]; - } - return NNACL_OK; -} - -int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementAdd(tile_input0, tile_input1, output, element_size); -} - -int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, - int8_t *output, int element_size, ArithmeticParameter *param) { - TileDimensionsInt8(input0, input1, tile_input0, tile_input1, param); - return ElementAddInt8(tile_input0, tile_input1, output, element_size); -} - -int ElementSub(const float *input0, const float *input1, float *output, const int element_size) { +int ElementSub(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vsubq_f32(vin0, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] - input1[index]; + for (; index < size; index++) { + out[index] = in0[index] - in1[index]; } return NNACL_OK; } -int ElementSubInt(const int *input0, const int *input1, int *output, const int element_size) { +int ElementSubInt(const int *in0, const int *in1, int *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vsubq_s32(vin0, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] - input1[index]; + for (; index < size; index++) { + out[index] = in0[index] - in1[index]; } return NNACL_OK; } -int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size) { +int ElementSubRelu(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t zeros = vdupq_n_f32(0.0f); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vsubq_f32(vin0, vin1); vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - float res = input0[index] - input1[index]; - output[index] = res > 0 ? res : 0; + for (; index < size; index++) { + float res = in0[index] - in1[index]; + out[index] = res > 0 ? res : 0; } return NNACL_OK; } -int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size) { +int ElementSubRelu6(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t zeros = vdupq_n_f32(0.0f); float32x4_t bounds = vdupq_n_f32(6.0f); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1), zeros), bounds); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = MSMIN(MSMAX(input0[index] - input1[index], 0), 6); + for (; index < size; index++) { + out[index] = MSMIN(MSMAX(in0[index] - in1[index], 0), 6); } return NNACL_OK; } -int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementSub(tile_input0, tile_input1, output, element_size); +int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param) { + TileDimensionsFp32(in0, in1, tile_in0, tile_in0, param); + return ElementDiv(tile_in0, tile_in0, out, size); } -int ElementDiv(const float *input0, const float *input1, float *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = input0[i] / input1[i]; +int ElementDiv(const float *in0, const float *in1, float *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = in0[i] / in1[i]; } return NNACL_OK; } -int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - float res = input0[i] / input1[i]; - output[i] = res > 0 ? res : 0; +int ElementDivRelu(const float *in0, const float *in1, float *out, int size) { + for (int i = 0; i < size; i++) { + float res = in0[i] / in1[i]; + out[i] = res > 0 ? res : 0; } return NNACL_OK; } -int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6); +int ElementDivRelu6(const float *in0, const float *in1, float *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = MSMIN(MSMAX(in0[i] / in1[i], 0), 6); } return NNACL_OK; } -int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementDiv(tile_input0, tile_input1, output, element_size); -} - -int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; +int ElementFloorMod(const float *in0, const float *in1, float *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = in0[i] - floorf(in0[i] / in1[i]) * in1[i]; } return NNACL_OK; } -int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = input0[i] - (input0[i] / input1[i]) * input1[i]; +int ElementFloorModInt(const int *in0, const int *in1, int *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = in0[i] - (in0[i] / in1[i]) * in1[i]; } return NNACL_OK; } -int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementFloorMod(tile_input0, tile_input1, output, element_size); -} - -int ElementMod(const float *input0, const float *input1, float *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = fmod(input0[i], input1[i]); +int ElementMod(const float *in0, const float *in1, float *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = fmod(in0[i], in1[i]); } return NNACL_OK; } -int ElementModInt(const int *input0, const int *input1, int *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = fmod(input0[i], input1[i]); +int ElementModInt(const int *in0, const int *in1, int *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = fmod(in0[i], in1[i]); } return NNACL_OK; } -int ElementOptMod(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { - for (int index = 0; index < element_size; index++) { - output[index] = fmod(input0[0], input1[index]); + for (int index = 0; index < size; index++) { + out[index] = fmod(in0[0], in1[index]); } } else { - for (int index = 0; index < element_size; index++) { - output[index] = fmod(input0[index], input1[0]); + for (int index = 0; index < size; index++) { + out[index] = fmod(in0[index], in1[0]); } } return NNACL_OK; } -int ElementOptModInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param) { +int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { - for (int index = 0; index < element_size; index++) { - output[index] = fmod(input0[0], input1[index]); + for (int index = 0; index < size; index++) { + out[index] = fmod(in0[0], in1[index]); } } else { - for (int index = 0; index < element_size; index++) { - output[index] = fmod(input0[index], input1[0]); + for (int index = 0; index < size; index++) { + out[index] = fmod(in0[index], in1[0]); } } return NNACL_OK; } -int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = floorf(input0[i] / input1[i]); +int ElementFloorDiv(const float *in0, const float *in1, float *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = floorf(in0[i] / in1[i]); } return NNACL_OK; } -int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size) { - for (int i = 0; i < element_size; i++) { - output[i] = input0[i] / input1[i]; +int ElementFloorDivInt(const int *in0, const int *in1, int *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = in0[i] / in1[i]; } return NNACL_OK; } -int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementFloorDiv(tile_input0, tile_input1, output, element_size); -} - -int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size) { +int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t vtrue = vdupq_n_f32(1); float32x4_t vfalse = vdupq_n_f32(0); uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1)); uint32x4_t zeros = vdupq_n_u32(0); - for (; index <= element_size - 4; index += C4NUM) { - uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input0 + index)), mask); - uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input1 + index)), mask); + for (; index <= size - 4; index += C4NUM) { + uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in0 + index)), mask); + uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in1 + index)), mask); float32x4_t vout = vbslq_f32(vceqq_u32(vandq_u32(vin0, vin1), zeros), vfalse, vtrue); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = (float)((bool)(input0[index]) & (bool)(input1[index])); + for (; index < size; index++) { + out[index] = (float)((bool)(in0[index]) & (bool)(in1[index])); } return NNACL_OK; } -int ElementLogicalAndInt(const int *input0, const int *input1, int *output, const int element_size) { +int ElementLogicalAndInt(const int *in0, const int *in1, int *out, int size) { int index = 0; - for (; index < element_size; index++) { - output[index] = (int)((int)(input0[index]) & (int)(input1[index])); + for (; index < size; index++) { + out[index] = (int)((int)(in0[index]) & (int)(in1[index])); } return NNACL_OK; } -int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size) { +int ElementLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size) { int index = 0; - for (; index < element_size; index++) { - output[index] = (bool)((bool)(input0[index]) & (bool)(input1[index])); + for (; index < size; index++) { + out[index] = (bool)((bool)(in0[index]) & (bool)(in1[index])); } return NNACL_OK; } -int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size) { - ElementSub(input0, input1, output, element_size); - return ElementMul(output, output, output, element_size); -} - -int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, - float *output, int element_size, ArithmeticParameter *param) { - BroadcastSub(input0, input1, tile_input0, tile_input1, output, element_size, param); - return ElementMul(output, output, output, element_size); +int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size) { + ElementSub(in0, in1, out, size); + return ElementMul(out, out, out, size); } -int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementLogicalAnd(tile_input0, tile_input1, output, element_size); -} - -int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size) { +int ElementLogicalOr(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON float32x4_t vtrue = vdupq_n_f32(1); float32x4_t vfalse = vdupq_n_f32(0); uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1)); uint32x4_t zeros = vdupq_n_u32(0); - for (; index <= element_size - 4; index += C4NUM) { - uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input0 + index)), mask); - uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(input1 + index)), mask); + for (; index <= size - 4; index += C4NUM) { + uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in0 + index)), mask); + uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in1 + index)), mask); float32x4_t vout = vbslq_f32(vceqq_u32(vorrq_u32(vin0, vin1), zeros), vfalse, vtrue); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = (float)((bool)(input0[index]) | (bool)(input1[index])); + for (; index < size; index++) { + out[index] = (float)((bool)(in0[index]) | (bool)(in1[index])); } return NNACL_OK; } -int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementLogicalOr(tile_input0, tile_input1, output, element_size); -} - -int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size) { +int ElementMaximum(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vmaxq_f32(vin0, vin1); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] > input1[index] ? input0[index] : input1[index]; + for (; index < size; index++) { + out[index] = in0[index] > in1[index] ? in0[index] : in1[index]; } return NNACL_OK; } -int ElementMaximumInt(const int *input0, const int *input1, int *output, const int element_size) { +int ElementMaximumInt(const int *in0, const int *in1, int *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - int32x4_t vin0 = vld1q_s32(input0 + index); - int32x4_t vin1 = vld1q_s32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + int32x4_t vin0 = vld1q_s32(in0 + index); + int32x4_t vin1 = vld1q_s32(in1 + index); int32x4_t vout = vmaxq_s32(vin0, vin1); - vst1q_s32(output + index, vout); + vst1q_s32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = input0[index] > input1[index] ? input0[index] : input1[index]; + for (; index < size; index++) { + out[index] = in0[index] > in1[index] ? in0[index] : in1[index]; } return NNACL_OK; } -int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementMaximum(tile_input0, tile_input1, output, element_size); +int BroadcastMaximum(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param) { + TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param); + return ElementMaximum(tile_in0, tile_in1, out, size); } -int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size) { +int ElementMinimum(const float *in0, const float *in1, float *out, int size) { int index = 0; #ifdef ENABLE_NEON - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); + for (; index <= size - 4; index += C4NUM) { + float32x4_t vin0 = vld1q_f32(in0 + index); + float32x4_t vin1 = vld1q_f32(in1 + index); float32x4_t vout = vminq_f32(vin0, vin1); - vst1q_f32(output + index, vout); - } -#endif - for (; index < element_size; index++) { - output[index] = input0[index] > input1[index] ? input1[index] : input0[index]; - } - return NNACL_OK; -} - -int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementMinimum(tile_input0, tile_input1, output, element_size); -} - -float FloatNotEqualCheck(float in0, float in1) { - float tmp = in0 - in1; - if (tmp <= ACCURACY_DATA && tmp >= -ACCURACY_DATA) { - return (float)false; - } - return (float)true; -} - -int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size) { - int index = 0; -#ifdef ENABLE_NEON - float32x4_t vtrue = vdupq_n_f32(1); - float32x4_t vfalse = vdupq_n_f32(0); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); - float32x4_t vout = vbslq_f32(vceqq_f32(vin0, vin1), vfalse, vtrue); - vst1q_f32(output + index, vout); - } -#endif - for (; index < element_size; index++) { - output[index] = (float)(fabsf(input0[index] - input1[index]) > FLT_EPSILON); - } - return NNACL_OK; -} - -int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementNotEqual(tile_input0, tile_input1, output, element_size); -} - -float FloatEqualCheck(float in0, float in1) { - float tmp = in0 - in1; - if (tmp <= ACCURACY_DATA && tmp >= -ACCURACY_DATA) { - return (float)true; - } - return (float)false; -} - -int ElementEqual(const float *input0, const float *input1, float *output, const int element_size) { - int index = 0; -#ifdef ENABLE_NEON - float32x4_t vtrue = vdupq_n_f32(1); - float32x4_t vfalse = vdupq_n_f32(0); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); - float32x4_t vout = vbslq_f32(vceqq_f32(vin0, vin1), vtrue, vfalse); - vst1q_f32(output + index, vout); - } -#endif - for (; index < element_size; index++) { - output[index] = (float)(fabsf(input0[index] - input1[index]) <= FLT_EPSILON); - } - return NNACL_OK; -} - -int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementEqual(tile_input0, tile_input1, output, element_size); -} - -int ElementLess(const float *input0, const float *input1, float *output, const int element_size) { - int index = 0; -#ifdef ENABLE_NEON - float32x4_t vtrue = vdupq_n_f32(1); - float32x4_t vfalse = vdupq_n_f32(0); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); - float32x4_t vout = vbslq_f32(vcltq_f32(vin0, vin1), vtrue, vfalse); - vst1q_f32(output + index, vout); + vst1q_f32(out + index, vout); } #endif - for (; index < element_size; index++) { - output[index] = (float)(input0[index] < input1[index]); + for (; index < size; index++) { + out[index] = in0[index] > in1[index] ? in1[index] : in0[index]; } return NNACL_OK; } -int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementLess(tile_input0, tile_input1, output, element_size); -} - -int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size) { - int index = 0; -#ifdef ENABLE_NEON - float32x4_t vtrue = vdupq_n_f32(1); - float32x4_t vfalse = vdupq_n_f32(0); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); - float32x4_t vout = vbslq_f32(vcleq_f32(vin0, vin1), vtrue, vfalse); - vst1q_f32(output + index, vout); - } -#endif - for (; index < element_size; index++) { - output[index] = (float)(input0[index] <= input1[index]); - } - return NNACL_OK; -} - -int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementLessEqual(tile_input0, tile_input1, output, element_size); -} - -int ElementGreater(const float *input0, const float *input1, float *output, const int element_size) { - int index = 0; -#ifdef ENABLE_NEON - float32x4_t vtrue = vdupq_n_f32(1); - float32x4_t vfalse = vdupq_n_f32(0); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); - float32x4_t vout = vbslq_f32(vcgtq_f32(vin0, vin1), vtrue, vfalse); - vst1q_f32(output + index, vout); - } -#endif - for (; index < element_size; index++) { - output[index] = (float)(input0[index] > input1[index]); - } - return NNACL_OK; -} - -int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementGreater(tile_input0, tile_input1, output, element_size); -} - -int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size) { - int index = 0; -#ifdef ENABLE_NEON - float32x4_t vtrue = vdupq_n_f32(1); - float32x4_t vfalse = vdupq_n_f32(0); - for (; index <= element_size - 4; index += C4NUM) { - float32x4_t vin0 = vld1q_f32(input0 + index); - float32x4_t vin1 = vld1q_f32(input1 + index); - float32x4_t vout = vbslq_f32(vcgeq_f32(vin0, vin1), vtrue, vfalse); - vst1q_f32(output + index, vout); - } -#endif - for (; index < element_size; index++) { - output[index] = (float)(input0[index] >= input1[index]); - } - return NNACL_OK; -} - -int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, - float *output, int element_size, ArithmeticParameter *param) { - TileDimensions(input0, input1, tile_input0, tile_input1, param); - return ElementGreaterEqual(tile_input0, tile_input1, output, element_size); -} - #undef ACCURACY_DATA #ifdef ENABLE_NNACL_INFER_SHAPE @@ -1304,3 +1069,30 @@ int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int * return NNACL_OK; } #endif + +void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, + const int *inStrides, const int *outStrides, const int *multiple) { + int srcDimSize = inShape[dim]; + if (dim == ndim - 1) { + for (int i = 0; i < multiple[dim]; i++) { + memcpy(outData, inData, srcDimSize * sizeof(float)); + outData += srcDimSize; + } + return; + } + for (size_t i = 0; i < srcDimSize; i++) { + for (size_t j = 0; j < multiple[dim]; j++) { + TileOneDimensionFp32(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim, + inShape, inStrides, outStrides, multiple); + } + } +} + +void TileDimensionsFp32(const float *data0, const float *data1, float *tile_data0, float *tile_data1, + ArithmeticParameter *param) { + CalcMultiplesAndStrides(param); + TileOneDimensionFp32(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, + param->multiples0_); + TileOneDimensionFp32(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, + param->multiples1_); +} diff --git a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h index bf80d6a4d5..2d0496cfe5 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h +++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h @@ -20,144 +20,97 @@ #include #endif #include "nnacl/op_base.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "nnacl/errorcode.h" #ifdef __cplusplus extern "C" { #endif -int ElementOptAdd(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptAddInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptAddRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptAddRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptSub(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptSubInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptSubRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptSubRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptMul(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptMulRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptMulRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptMulInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptMulReluInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptDiv(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptDivRelu(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptDivRelu6(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptDivInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); -int ElementMul(const float *input0, const float *input1, float *output, const int element_size); -int ElementMulRelu(const float *input0, const float *input1, float *output, const int element_size); -int ElementMulRelu6(const float *input0, const float *input1, float *output, const int element_size); -int ElementMulInt(const int *input0, const int *input1, int *output, const int element_size); -int ElementMulReluInt(const int *input0, const int *input1, int *output, const int element_size); -int ElementMulRelu6Int(const int *input0, const int *input1, int *output, const int element_size); -int BroadcastMul(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementAdd(const float *input0, const float *input1, float *output, const int element_size); -int ElementAddRelu(const float *input0, const float *input1, float *output, const int element_size); -int ElementAddRelu6(const float *input0, const float *input1, float *output, const int element_size); -int ElementAddInt(const int *input0, const int *input1, int *output, const int element_size); -int BroadcastAdd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); -int BroadcastAddInt8(const int8_t *input0, const int8_t *input1, int8_t *tile_input0, int8_t *tile_input1, - int8_t *output, int element_size, ArithmeticParameter *param); - -int ElementSub(const float *input0, const float *input1, float *output, const int element_size); -int ElementSubInt(const int *input0, const int *input1, int *output, const int element_size); -int ElementSubRelu(const float *input0, const float *input1, float *output, const int element_size); -int ElementSubRelu6(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastSub(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementDiv(const float *input0, const float *input1, float *output, const int element_size); -int ElementDivRelu(const float *input0, const float *input1, float *output, const int element_size); -int ElementDivRelu6(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementLogicalAnd(const float *input0, const float *input1, float *output, const int element_size); -int ElementLogicalAndInt(const int *input0, const int *input1, int *output, const int element_size); -int ElementLogicalAndBool(const bool *input0, const bool *input1, bool *output, const int element_size); -int BroadcastLogicalAnd(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementLogicalOr(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastLogicalOr(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementMaximum(const float *input0, const float *input1, float *output, const int element_size); -int ElementMaximumInt(const int *input0, const int *input1, int *output, const int element_size); -int BroadcastMaximum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementMinimum(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastMinimum(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementFloorDiv(const float *input0, const float *input1, float *output, const int element_size); -int ElementFloorDivInt(const int *input0, const int *input1, int *output, const int element_size); -int BroadcastFloorDiv(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementFloorMod(const float *input0, const float *input1, float *output, const int element_size); -int ElementFloorModInt(const int *input0, const int *input1, int *output, const int element_size); -int BroadcastFloorMod(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementMod(const float *input0, const float *input1, float *output, const int element_size); -int ElementModInt(const int *input0, const int *input1, int *output, const int element_size); -int ElementOptMod(const float *input0, const float *input1, float *output, const int element_size, - const ArithmeticParameter *param); -int ElementOptModInt(const int *input0, const int *input1, int *output, const int element_size, - const ArithmeticParameter *param); - -int ElementSquaredDifference(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastSquaredDifference(const float *input0, const float *input1, float *tile_input0, float *tile_input1, - float *output, int element_size, ArithmeticParameter *param); - -int ElementNotEqual(const float *input0, const float *input1, float *output, const int element_size); - -int BroadcastNotEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementEqual(const float *input0, const float *input1, float *output, const int element_size); - -int BroadcastEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementLess(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastLess(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementLessEqual(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastLessEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementGreater(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastGreater(const float *input0, const float *input1, float *tile_input0, float *tile_input1, float *output, - int element_size, ArithmeticParameter *param); - -int ElementGreaterEqual(const float *input0, const float *input1, float *output, const int element_size); -int BroadcastGreaterEqual(const float *input0, const float *input1, float *tile_input0, float *tile_input1, - float *output, int element_size, ArithmeticParameter *param); +void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape, + const int *inStrides, const int *outStrides, const int *multiple); +void TileDimensionsFp32(const float *data0, const float *data1, float *tile_data0, float *tile_data1, + ArithmeticParameter *param); + +/* Mul */ +int ElementMul(const float *in0, const float *in1, float *out, int size); +int ElementMulRelu(const float *in0, const float *in1, float *out, int size); +int ElementMulRelu6(const float *in0, const float *in1, float *out, int size); +int ElementMulInt(const int *in0, const int *in1, int *out, int size); +int ElementMulReluInt(const int *in0, const int *in1, int *out, int size); +int ElementMulRelu6Int(const int *in0, const int *in1, int *out, int size); +int ElementOptMul(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptMulRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptMulRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptMulInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); +int ElementOptMulReluInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); +int ElementOptMulRelu6Int(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); +int BroadcastMul(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param); + +/* Add */ +int ElementAdd(const float *in0, const float *in1, float *out, int size); +int ElementAddRelu(const float *in0, const float *in1, float *out, int size); +int ElementAddRelu6(const float *in0, const float *in1, float *out, int size); +int ElementAddInt(const int *in0, const int *in1, int *out, int size); +int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); +int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param); + +/* Sub */ +int ElementSub(const float *in0, const float *in1, float *out, int size); +int ElementSubInt(const int *in0, const int *in1, int *out, int size); +int ElementSubRelu(const float *in0, const float *in1, float *out, int size); +int ElementSubRelu6(const float *in0, const float *in1, float *out, int size); +int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); + +/* Div */ +int ElementDiv(const float *in0, const float *in1, float *out, int size); +int ElementDivRelu(const float *in0, const float *in1, float *out, int size); +int ElementDivRelu6(const float *in0, const float *in1, float *out, int size); +int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); +int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size, + ArithmeticParameter *param); + +/* logical and */ +int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size); +int ElementLogicalAndInt(const int *in0, const int *in1, int *out, int size); +int ElementLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size); + +/* logical or */ +int ElementLogicalOr(const float *in0, const float *in1, float *out, int size); + +/* Element Squared Difference */ +int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size); + +/* max min */ +int ElementMaximum(const float *in0, const float *in1, float *out, int size); +int ElementMinimum(const float *in0, const float *in1, float *out, int size); +int ElementMaximumInt(const int *in0, const int *in1, int *out, int size); +int BroadcastMaximum(const float *in0, const float *in1, float *tile_input0, float *tile_input1, float *out, int size, + ArithmeticParameter *param); + +/* floor div */ +int ElementFloorDiv(const float *in0, const float *in1, float *out, int size); +int ElementFloorDivInt(const int *in0, const int *in1, int *out, int size); + +/* floor mod */ +int ElementFloorMod(const float *in0, const float *in1, float *out, int size); +int ElementFloorModInt(const int *in0, const int *in1, int *out, int size); + +/* mod */ +int ElementMod(const float *in0, const float *in1, float *out, int size); +int ElementModInt(const int *in0, const int *in1, int *out, int size); +int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param); +int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param); #ifdef ENABLE_NNACL_INFER_SHAPE int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format, diff --git a/mindspore/lite/nnacl/fp32/space_to_batch_fp32.c b/mindspore/lite/nnacl/fp32/space_to_batch_fp32.c index 3016b95eac..2f0077ba77 100644 --- a/mindspore/lite/nnacl/fp32/space_to_batch_fp32.c +++ b/mindspore/lite/nnacl/fp32/space_to_batch_fp32.c @@ -14,7 +14,6 @@ * limitations under the License. */ #include "nnacl/fp32/space_to_batch_fp32.h" -#include "nnacl/arithmetic_common.h" void DoSpaceToBatch(const float *input, float *output, const int *in_shape, const int *out_shape, const int *in_stride, const int *out_stride, const int *blocks, const int *paddings, int thread, int task_id) { diff --git a/mindspore/lite/nnacl/fp32/space_to_batch_fp32.h b/mindspore/lite/nnacl/fp32/space_to_batch_fp32.h index d477ace457..84aa1e4007 100644 --- a/mindspore/lite/nnacl/fp32/space_to_batch_fp32.h +++ b/mindspore/lite/nnacl/fp32/space_to_batch_fp32.h @@ -15,6 +15,8 @@ */ #ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_BATCH_H_ #define MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_BATCH_H_ + +#include #include "nnacl/op_base.h" typedef struct SpaceToBatchParameter { diff --git a/mindspore/lite/nnacl/fp32/space_to_depth_fp32.c b/mindspore/lite/nnacl/fp32/space_to_depth_fp32.c index ceac8f7368..80f242ed8f 100644 --- a/mindspore/lite/nnacl/fp32/space_to_depth_fp32.c +++ b/mindspore/lite/nnacl/fp32/space_to_depth_fp32.c @@ -14,7 +14,7 @@ * limitations under the License. */ #include "nnacl/fp32/space_to_depth_fp32.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" #include "nnacl/errorcode.h" #include "nnacl/op_base.h" diff --git a/mindspore/lite/nnacl/fp32/stack_fp32.c b/mindspore/lite/nnacl/fp32/stack_fp32.c index b8ebad4b69..a35dec49c0 100644 --- a/mindspore/lite/nnacl/fp32/stack_fp32.c +++ b/mindspore/lite/nnacl/fp32/stack_fp32.c @@ -15,7 +15,7 @@ */ #include "nnacl/fp32/stack_fp32.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" size_t GetStackCopyNum(int axis, const int *in_shape, size_t shape_size) { size_t one_input_size = 1; diff --git a/mindspore/lite/nnacl/int8/add_int8.c b/mindspore/lite/nnacl/int8/add_int8.c index cd4ec4ce19..7b0c33067e 100644 --- a/mindspore/lite/nnacl/int8/add_int8.c +++ b/mindspore/lite/nnacl/int8/add_int8.c @@ -312,3 +312,16 @@ void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, i } return; } + +int ElementAddInt8(const int8_t *in0, const int8_t *in1, int8_t *out, int size) { + for (int i = 0; i < size; i++) { + out[i] = in0[i] + in1[i]; + } + return NNACL_OK; +} + +int BroadcastAddInt8(const int8_t *in0, const int8_t *in1, int8_t *tile_in0, int8_t *tile_in1, int8_t *out, int size, + ArithmeticParameter *param) { + TileDimensionsInt8(in0, in1, tile_in0, tile_in1, param); + return ElementAddInt8(tile_in0, tile_in1, out, size); +} diff --git a/mindspore/lite/nnacl/int8/add_int8.h b/mindspore/lite/nnacl/int8/add_int8.h index 15fdc03d7a..f5469b08b9 100644 --- a/mindspore/lite/nnacl/int8/add_int8.h +++ b/mindspore/lite/nnacl/int8/add_int8.h @@ -18,6 +18,9 @@ #define MINDSPORE_LITE_NNACL_ADD_INT8_H_ #include "nnacl/op_base.h" +#include "nnacl/errorcode.h" +#include "nnacl/arithmetic.h" +#include "nnacl/int8/arithmetic_int8.h" typedef struct AddQuantQrgs { int32_t zp_; @@ -48,6 +51,10 @@ void AddInt8(const int8_t *input0, const int8_t *input1, int8_t *output, int siz void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, int size, AddQuantParameter *params, AddQuantQrgs *ptr_args, AddQuantQrgs *ele_args); +int ElementAddInt8(const int8_t *in0, const int8_t *in1, int8_t *out, int size); +int BroadcastAddInt8(const int8_t *in0, const int8_t *in1, int8_t *tile_in0, int8_t *tile_in1, int8_t *out, int size, + ArithmeticParameter *param); + #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/int8/arithmetic_int8.c b/mindspore/lite/nnacl/int8/arithmetic_int8.c index 3685d61b84..e6e320f70f 100644 --- a/mindspore/lite/nnacl/int8/arithmetic_int8.c +++ b/mindspore/lite/nnacl/int8/arithmetic_int8.c @@ -20,6 +20,33 @@ #endif #include "nnacl/errorcode.h" +void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int *inShape, + const int *inStrides, const int *outStrides, const int *multiple) { + int srcDimSize = inShape[dim]; + if (dim == ndim - 1) { + for (int i = 0; i < multiple[dim]; i++) { + memcpy(outData, inData, srcDimSize * sizeof(int8_t)); + outData += srcDimSize; + } + return; + } + for (size_t i = 0; i < srcDimSize; i++) { + for (size_t j = 0; j < multiple[dim]; j++) { + TileOneDimensionInt8(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim, + inShape, inStrides, outStrides, multiple); + } + } +} + +void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, + ArithmeticParameter *param) { + CalcMultiplesAndStrides(param); + TileOneDimensionInt8(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_, + param->multiples0_); + TileOneDimensionInt8(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_, + param->multiples1_); +} + #define ACCURACY_DATA 0.00000001 int ElementNotEqualInt8(int8_t *input0, int8_t *input1, uint8_t *output, int element_size, diff --git a/mindspore/lite/nnacl/int8/arithmetic_int8.h b/mindspore/lite/nnacl/int8/arithmetic_int8.h index 3c1cf6e5b6..98f3f27ac4 100644 --- a/mindspore/lite/nnacl/int8/arithmetic_int8.h +++ b/mindspore/lite/nnacl/int8/arithmetic_int8.h @@ -17,11 +17,17 @@ #define MINDSPORE_LITE_NNACL_INT8_ARITHMETIC_INT8_H_ #include "nnacl/op_base.h" +#include "nnacl/arithmetic.h" #include "nnacl/quantization/quantize.h" #ifdef __cplusplus extern "C" { #endif +void TileOneDimensionInt8(const int8_t *inData, int8_t *outData, int dim, size_t ndim, const int *inShape, + const int *inStrides, const int *outStrides, const int *multiple); +void TileDimensionsInt8(const int8_t *data0, const int8_t *data1, int8_t *tile_data0, int8_t *tile_data1, + ArithmeticParameter *param); + int ElementNotEqualInt8(int8_t *input0, int8_t *input1, uint8_t *output, int element_size, ArithmeticQuantArg *quant_arg); diff --git a/mindspore/lite/nnacl/int8/space_to_batch_int8.c b/mindspore/lite/nnacl/int8/space_to_batch_int8.c index df3aa2cfc6..5bdb2dd316 100644 --- a/mindspore/lite/nnacl/int8/space_to_batch_int8.c +++ b/mindspore/lite/nnacl/int8/space_to_batch_int8.c @@ -14,7 +14,7 @@ * limitations under the License. */ #include "nnacl/int8/space_to_batch_int8.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, const int *block_sizes, const int *in_shape, const int *out_shape) { diff --git a/mindspore/lite/nnacl/reverse_sequence.c b/mindspore/lite/nnacl/reverse_sequence.c index 78e4cb8757..5b0757b728 100644 --- a/mindspore/lite/nnacl/reverse_sequence.c +++ b/mindspore/lite/nnacl/reverse_sequence.c @@ -16,7 +16,7 @@ #include "nnacl/reverse_sequence.h" #include -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" void ReverseSequence(float *input0, const void *input1, float *output, ReverseSequenceParameter *para) { (void)memcpy(output, input0, para->total_data_size_); diff --git a/mindspore/lite/src/ops/arithmetic.h b/mindspore/lite/src/ops/arithmetic.h index d7f4d46165..6c5c6f807a 100644 --- a/mindspore/lite/src/ops/arithmetic.h +++ b/mindspore/lite/src/ops/arithmetic.h @@ -21,7 +21,7 @@ #include #include #include "src/ops/primitive_c.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" namespace mindspore { namespace lite { diff --git a/mindspore/lite/src/ops/populate/add_populate.cc b/mindspore/lite/src/ops/populate/add_populate.cc index 05119f7b3d..e2722ff084 100644 --- a/mindspore/lite/src/ops/populate/add_populate.cc +++ b/mindspore/lite/src/ops/populate/add_populate.cc @@ -17,7 +17,7 @@ #include "src/ops/add.h" #include "src/ops/primitive_c.h" #include "src/ops/populate/populate_register.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "src/ops/populate/arithmetic_populate.h" namespace mindspore { diff --git a/mindspore/lite/src/ops/populate/bias_add_populate.cc b/mindspore/lite/src/ops/populate/bias_add_populate.cc index 953c6fdbc3..f4875a5fd4 100644 --- a/mindspore/lite/src/ops/populate/bias_add_populate.cc +++ b/mindspore/lite/src/ops/populate/bias_add_populate.cc @@ -16,7 +16,7 @@ #include "src/ops/primitive_c.h" #include "src/ops/populate/populate_register.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" namespace mindspore { namespace lite { diff --git a/mindspore/lite/src/ops/populate/bias_grad_populate.cc b/mindspore/lite/src/ops/populate/bias_grad_populate.cc index d19a8a2278..0bb338d3e3 100644 --- a/mindspore/lite/src/ops/populate/bias_grad_populate.cc +++ b/mindspore/lite/src/ops/populate/bias_grad_populate.cc @@ -16,7 +16,7 @@ #include "src/ops/primitive_c.h" #include "src/ops/populate/populate_register.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" namespace mindspore { namespace lite { diff --git a/mindspore/lite/src/ops/populate/flatten_populate.cc b/mindspore/lite/src/ops/populate/flatten_populate.cc index 52d5877320..6905ad3176 100644 --- a/mindspore/lite/src/ops/populate/flatten_populate.cc +++ b/mindspore/lite/src/ops/populate/flatten_populate.cc @@ -16,18 +16,17 @@ #include "src/ops/primitive_c.h" #include "src/ops/populate/populate_register.h" -#include "nnacl/flatten.h" namespace mindspore { namespace lite { OpParameter *PopulateFlattenParameter(const mindspore::lite::PrimitiveC *primitive) { - FlattenParameter *flatten_param = reinterpret_cast(malloc(sizeof(FlattenParameter))); + OpParameter *flatten_param = reinterpret_cast(malloc(sizeof(OpParameter))); if (flatten_param == nullptr) { MS_LOG(ERROR) << "malloc FlattenParameter failed."; return nullptr; } - memset(flatten_param, 0, sizeof(FlattenParameter)); - flatten_param->op_parameter_.type_ = primitive->Type(); + memset(flatten_param, 0, sizeof(OpParameter)); + flatten_param->type_ = primitive->Type(); return reinterpret_cast(flatten_param); } diff --git a/mindspore/lite/src/ops/populate/mul_populate.cc b/mindspore/lite/src/ops/populate/mul_populate.cc index f33dc24ff1..1d7b709eda 100644 --- a/mindspore/lite/src/ops/populate/mul_populate.cc +++ b/mindspore/lite/src/ops/populate/mul_populate.cc @@ -15,7 +15,7 @@ */ #include "src/ops/mul.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "src/ops/primitive_c.h" #include "src/ops/populate/populate_register.h" #include "src/ops/populate/arithmetic_populate.h" diff --git a/mindspore/lite/src/ops/populate/squeeze_populate.cc b/mindspore/lite/src/ops/populate/squeeze_populate.cc index d270fc829e..fe2c3fd32c 100644 --- a/mindspore/lite/src/ops/populate/squeeze_populate.cc +++ b/mindspore/lite/src/ops/populate/squeeze_populate.cc @@ -23,13 +23,13 @@ namespace mindspore { namespace lite { OpParameter *PopulateSqueezeParameter(const mindspore::lite::PrimitiveC *primitive) { - SqueezeParameter *squeeze_param = reinterpret_cast(malloc(sizeof(SqueezeParameter))); + OpParameter *squeeze_param = reinterpret_cast(malloc(sizeof(OpParameter))); if (squeeze_param == nullptr) { MS_LOG(ERROR) << "malloc SqueezeParameter failed."; return nullptr; } - memset(squeeze_param, 0, sizeof(SqueezeParameter)); - squeeze_param->op_parameter_.type_ = primitive->Type(); + memset(squeeze_param, 0, sizeof(OpParameter)); + squeeze_param->type_ = primitive->Type(); return reinterpret_cast(squeeze_param); } Registry SqueezeParameterRegistry(schema::PrimitiveType_Squeeze, PopulateSqueezeParameter); diff --git a/mindspore/lite/src/ops/populate/sub_populate.cc b/mindspore/lite/src/ops/populate/sub_populate.cc index b3d38a3776..5685851953 100644 --- a/mindspore/lite/src/ops/populate/sub_populate.cc +++ b/mindspore/lite/src/ops/populate/sub_populate.cc @@ -17,7 +17,7 @@ #include "src/ops/sub.h" #include "src/ops/primitive_c.h" #include "src/ops/populate/populate_register.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "src/ops/populate/arithmetic_populate.h" namespace mindspore { diff --git a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc index 8a95daf30e..1e01125c9e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc @@ -16,7 +16,7 @@ #include "src/runtime/kernel/arm/base/depth_to_space_base.h" #include "nnacl/depth_to_space.h" #include "src/runtime/kernel/arm/fp32/depth_to_space_fp32.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc index 78aa4c5280..d40f7caac3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc @@ -39,7 +39,7 @@ ARITHMETIC_COMPARE_FUNC_INFO_FP16 arithmetic_cp_fun_table_fp16[] = { {PrimitiveType_NotEqual, schema::ActivationType_NO_ACTIVATION, ElementNotEqualFp16, ElementOptNotEqualFp16}, {PrimitiveType_Equal, schema::ActivationType_NO_ACTIVATION, ElementEqualFp16, ElementOptEqualFp16}, {PrimitiveType_Less, schema::ActivationType_NO_ACTIVATION, ElementLessFp16, ElementOptLessFp16}, - {PrimitiveType_LessEqual, schema::ActivationType_NO_ACTIVATION, ElementLessEqual, ElementOptLessEqualFp16}, + {PrimitiveType_LessEqual, schema::ActivationType_NO_ACTIVATION, ElementLessEqualFp16, ElementOptLessEqualFp16}, {PrimitiveType_Greater, schema::ActivationType_NO_ACTIVATION, ElementGreaterFp16, ElementOptGreaterFp16}, {PrimitiveType_GreaterEqual, schema::ActivationType_NO_ACTIVATION, ElementGreaterEqualFp16, ElementOptGreaterEqualFp16}}; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.h index 9e8a12efe7..f9dc051443 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/argminmax_fp32.h @@ -19,7 +19,7 @@ #include #include "include/errorcode.h" #include "nnacl/fp32/arg_min_max_fp32.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" #include "src/lite_kernel.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc index b7c322fb8c..8d44c2d7d6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc @@ -29,7 +29,6 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Eltwise; namespace mindspore::kernel { - ArithmeticCPUKernel::~ArithmeticCPUKernel() { FreeTmpPtr(); return; @@ -72,9 +71,10 @@ int ArithmeticCPUKernel::InitBroadCastCase() { if (input0_ptr_ == nullptr) { return RET_ERROR; } - TileOneDimension(reinterpret_cast(in_tensors_[0]->data_c()), reinterpret_cast(input0_ptr_), 0, - arithmeticParameter_->ndim_, arithmeticParameter_->in_shape0_, arithmeticParameter_->in_strides0_, - arithmeticParameter_->out_strides_, arithmeticParameter_->multiples0_); + TileOneDimensionFp32(reinterpret_cast(in_tensors_[0]->data_c()), reinterpret_cast(input0_ptr_), 0, + arithmeticParameter_->ndim_, arithmeticParameter_->in_shape0_, + arithmeticParameter_->in_strides0_, arithmeticParameter_->out_strides_, + arithmeticParameter_->multiples0_); arithmeticParameter_->broadcasting_ = false; input0_broadcast_ = true; } @@ -85,9 +85,10 @@ int ArithmeticCPUKernel::InitBroadCastCase() { FreeTmpPtr(); return RET_ERROR; } - TileOneDimension(reinterpret_cast(in_tensors_[1]->data_c()), reinterpret_cast(input1_ptr_), 0, - arithmeticParameter_->ndim_, arithmeticParameter_->in_shape1_, arithmeticParameter_->in_strides1_, - arithmeticParameter_->out_strides_, arithmeticParameter_->multiples1_); + TileOneDimensionFp32(reinterpret_cast(in_tensors_[1]->data_c()), reinterpret_cast(input1_ptr_), 0, + arithmeticParameter_->ndim_, arithmeticParameter_->in_shape1_, + arithmeticParameter_->in_strides1_, arithmeticParameter_->out_strides_, + arithmeticParameter_->multiples1_); arithmeticParameter_->broadcasting_ = false; input1_broadcast_ = true; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.h index 99a9acf7a6..c2d8c2b0d4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.h @@ -18,7 +18,6 @@ #include #include "include/errorcode.h" -#include "nnacl/arithmetic_common.h" #include "nnacl/depth_to_space.h" #include "src/runtime/kernel/arm/base/depth_to_space_base.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.cc index 875530aa85..1f7bbbc0de 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.cc @@ -17,7 +17,6 @@ #include "src/runtime/kernel/arm/fp32/flatten_fp32.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" -#include "nnacl/flatten.h" #include "include/errorcode.h" using mindspore::kernel::KERNEL_ARCH::kCPU; @@ -34,19 +33,12 @@ int FlattenCPUKernel::Init() { return ReSize(); } -int FlattenCPUKernel::ReSize() { - auto output_shape = out_tensors_.at(0)->shape(); - flatten_param_->size = sizeof(float); - for (size_t i = 0; i < output_shape.size(); i++) { - flatten_param_->size *= output_shape.at(i); - } - return RET_OK; -} +int FlattenCPUKernel::ReSize() { return RET_OK; } int FlattenCPUKernel::Run() { - auto input = reinterpret_cast(in_tensors_.at(0)->MutableData()); - auto output = reinterpret_cast(out_tensors_.at(0)->MutableData()); - Flatten(input, output, flatten_param_); + auto input = in_tensors_.at(0); + auto output = out_tensors_.at(0); + memcpy(output->data_c(), input->data_c(), output->Size()); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.h index c476f35931..e3274771ba 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/flatten_fp32.h @@ -18,9 +18,7 @@ #include #include "src/lite_kernel.h" - #include "include/context.h" -#include "nnacl/flatten.h" using mindspore::lite::InnerContext; @@ -30,17 +28,12 @@ class FlattenCPUKernel : public LiteKernel { FlattenCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) { - flatten_param_ = reinterpret_cast(parameter); - } + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} ~FlattenCPUKernel() override = default; int Init() override; int ReSize() override; int Run() override; - - private: - FlattenParameter *flatten_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.h index 41e7c742ea..a778ef63ae 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.h @@ -19,7 +19,7 @@ #include #include "src/lite_kernel.h" #include "nnacl/fp32/space_to_batch_fp32.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" namespace mindspore::kernel { class SpaceToBatchCPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc index 36119aed1f..9fd0251e12 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc @@ -15,9 +15,6 @@ */ #include "src/runtime/kernel/arm/int8/add_int8.h" -#include -#include -#include "nnacl/arithmetic_common.h" #include "nnacl/quantization/quantize.h" #include "src/runtime/runtime_api.h" #include "src/kernel_registry.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h index 8834387949..7075dfc0e0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/add_int8.h @@ -17,9 +17,11 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ADD_INT8_H_ #include +#include +#include #include "src/lite_kernel.h" #include "nnacl/int8/add_int8.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "src/runtime/runtime_api.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h index d8831c4cff..9dbc601ff2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h @@ -19,7 +19,7 @@ #include #include "nnacl/quantization/quantize.h" #include "nnacl/int8/arg_min_max_int8.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/common_func.h" #include "include/errorcode.h" #include "src/lite_kernel.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc index a618882b7e..a86cc1c15d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc @@ -17,7 +17,7 @@ #include "src/runtime/kernel/arm/int8/arithmetic_int8.h" #include "src/runtime/kernel/arm/int8/add_int8.h" #include "src/runtime/kernel/arm/int8/mul_int8.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "src/runtime/runtime_api.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h index 3f58fc0d00..016bf13ad7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h @@ -18,8 +18,9 @@ #include #include "src/lite_kernel.h" -#include "nnacl/fp32/unique_fp32.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" +#include "nnacl/int8/add_int8.h" +#include "nnacl/int8/arithmetic_int8.h" namespace mindspore::kernel { class BiasAddInt8CPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h index 15990b904f..558cfd7de8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h @@ -27,7 +27,6 @@ #include "nnacl/int8/matmul_int8.h" #include "src/runtime/kernel/arm/base/layout_transform.h" #include "src/runtime/kernel/arm/base/convolution_base.h" -#include "nnacl/arithmetic_common.h" namespace mindspore::kernel { class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc index 08ffc52306..53ef549398 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc @@ -17,7 +17,7 @@ #include "src/runtime/kernel/arm/int8/div_int8.h" #include #include -#include "nnacl/arithmetic_common.h" +#include "nnacl/int8/arithmetic_int8.h" #include "src/runtime/runtime_api.h" #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -114,9 +114,9 @@ int DivInt8CPUKernel::Run() { tile1_data_ = nullptr; return RET_ERROR; } - TileDimensionsUint8(static_cast(in_tensors_.at(0)->MutableData()), - static_cast(in_tensors_.at(1)->MutableData()), - reinterpret_cast(tile0_data_), reinterpret_cast(tile1_data_), &tile_para); + TileDimensionsInt8(static_cast(in_tensors_.at(0)->MutableData()), + static_cast(in_tensors_.at(1)->MutableData()), reinterpret_cast(tile0_data_), + reinterpret_cast(tile1_data_), &tile_para); } auto ret = ParallelLaunch(this->context_->thread_pool_, DivInt8Run, this, op_parameter_->thread_num_); if (broadcast_) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc index 709d3de4b9..d2df7b3753 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc @@ -15,10 +15,6 @@ */ #include "src/runtime/kernel/arm/int8/mul_int8.h" -#include -#include -#include "nnacl/arithmetic_common.h" -#include "nnacl/int8/mul_int8.h" #include "src/runtime/runtime_api.h" #include "src/kernel_registry.h" #include "include/errorcode.h" diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h index 1c3dc18ae7..08a3a3d23a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h @@ -17,9 +17,12 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MUL_INT8_H_ #include +#include +#include #include "src/lite_kernel.h" #include "nnacl/mul_parameter.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/int8/mul_int8.h" +#include "nnacl/int8/arithmetic_int8.h" #include "src/runtime/runtime_api.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.cc index b4c4da6344..f06005471a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.cc @@ -15,11 +15,6 @@ */ #include "src/runtime/kernel/arm/int8/scale_int8.h" - -#include -#include -#include "nnacl/int8/scale_int8.h" -#include "nnacl/arithmetic_common.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -66,9 +61,9 @@ int ScaleInt8CPUKernel::InitScaleOffset() { return RET_ERROR; } malloced_scale_ = true; - TileOneDimensionUint8(reinterpret_cast(in_tensors_.at(1)->data_c()), - reinterpret_cast(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, - tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); + TileOneDimensionInt8(reinterpret_cast(in_tensors_.at(1)->data_c()), + reinterpret_cast(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, + tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); } } @@ -93,9 +88,9 @@ int ScaleInt8CPUKernel::InitScaleOffset() { return RET_ERROR; } malloced_offset_ = true; - TileOneDimensionUint8(reinterpret_cast(in_tensors_.at(2)->data_c()), - reinterpret_cast(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, - tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); + TileOneDimensionInt8(reinterpret_cast(in_tensors_.at(2)->data_c()), + reinterpret_cast(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, + tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); } } } @@ -305,9 +300,9 @@ int ScaleInt8CPUKernel::Run() { MS_LOG(ERROR) << "malloc input1_data_ failed."; return RET_ERROR; } - TileOneDimensionUint8(reinterpret_cast(in_tensors_.at(1)->data_c()), - reinterpret_cast(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, - tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); + TileOneDimensionInt8(reinterpret_cast(in_tensors_.at(1)->data_c()), + reinterpret_cast(input1_data_), 0, tile_para->ndim_, tile_para->in_shape1_, + tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); } // If has bias, bias is passed by previous node case, need do broadcasting online @@ -319,9 +314,9 @@ int ScaleInt8CPUKernel::Run() { input1_data_ = nullptr; return RET_ERROR; } - TileOneDimensionUint8(reinterpret_cast(in_tensors_.at(2)->data_c()), - reinterpret_cast(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, - tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); + TileOneDimensionInt8(reinterpret_cast(in_tensors_.at(2)->data_c()), + reinterpret_cast(input2_data_), 0, tile_para->ndim_, tile_para->in_shape1_, + tile_para->in_strides1_, tile_para->out_strides_, tile_para->multiples1_); } auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRunInt8, this, op_parameter_->thread_num_); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.h index e66d9055ef..c7e07207c5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/scale_int8.h @@ -17,14 +17,15 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SCALE_INT8_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SCALE_INT8_H_ +#include #include #include "src/lite_kernel.h" #include "nnacl/scale.h" #include "nnacl/quantization/quantize.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/int8/arithmetic_int8.h" +#include "nnacl/int8/scale_int8.h" namespace mindspore::kernel { - class ScaleInt8CPUKernel : public LiteKernel { public: ScaleInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc index 2c3e3ead17..3d2765fc2d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc @@ -15,10 +15,6 @@ */ #include "src/runtime/kernel/arm/int8/sub_int8.h" -#include -#include -#include "nnacl/arithmetic_common.h" -#include "nnacl/quantization/quantize.h" #include "src/runtime/runtime_api.h" #include "src/kernel_registry.h" #include "include/errorcode.h" @@ -29,7 +25,6 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Sub; namespace mindspore::kernel { - int SubInt8CPUKernel::Init() { lite::Tensor *input0 = in_tensors_.at(0); lite::Tensor *input1 = in_tensors_.at(1); @@ -142,9 +137,9 @@ int SubInt8CPUKernel::Run() { context_->allocator->Free(tile0_data_); return RET_ERROR; } - TileDimensionsUint8(static_cast(in_tensors_.at(0)->MutableData()), - static_cast(in_tensors_.at(1)->MutableData()), - reinterpret_cast(tile0_data_), reinterpret_cast(tile1_data_), &tile_para); + TileDimensionsInt8(static_cast(in_tensors_.at(0)->data_c()), + static_cast(in_tensors_.at(1)->data_c()), reinterpret_cast(tile0_data_), + reinterpret_cast(tile1_data_), &tile_para); } auto ret = ParallelLaunch(this->context_->thread_pool_, SubInt8Run, this, op_parameter_->thread_num_); if (broadcast_) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h index 1a1e632dc0..728ced359a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h @@ -17,6 +17,10 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SUB_INT8_H_ #include +#include +#include +#include "nnacl/int8/arithmetic_int8.h" +#include "nnacl/quantization/quantize.h" #include "src/lite_kernel.h" #include "nnacl/int8/sub_int8.h" #include "src/runtime/runtime_api.h" diff --git a/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.h b/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.h index 90df10b7bf..ce0a55141b 100644 --- a/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/eltwise_npu.h @@ -18,7 +18,6 @@ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_ELTWISE_NPU_H_ #include #include "src/ops/eltwise.h" -#include "nnacl/arithmetic_common.h" #include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h index 80bade7352..f59e27c498 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h @@ -19,7 +19,6 @@ #include #include "nnacl/resize_parameter.h" #include "src/ops/resize.h" -#include "nnacl/arithmetic_common.h" #include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/train/train_populate_parameter.cc b/mindspore/lite/src/train/train_populate_parameter.cc index fef48e67be..892a47dffd 100644 --- a/mindspore/lite/src/train/train_populate_parameter.cc +++ b/mindspore/lite/src/train/train_populate_parameter.cc @@ -30,7 +30,7 @@ #include "src/ops/power_grad.h" #include "nnacl/power_parameter.h" #include "src/ops/bias_grad.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" #include "nnacl/fp32_grad/optimizer.h" #include "src/ops/apply_momentum.h" #include "src/ops/sgd.h" diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc deleted file mode 100644 index 53da47b5f6..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc +++ /dev/null @@ -1,1350 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "src/common/file_utils.h" -#include "mindspore/lite/nnacl/fp32/arithmetic_fp32.h" -#include "mindspore/lite/src/kernel_registry.h" -#include "mindspore/lite/src/lite_kernel.h" -#include "include/errorcode.h" - -namespace mindspore { - -class TestArithmeticTestFp32 : public mindspore::CommonTest { - public: - TestArithmeticTestFp32() {} - void PrepareInt(const std::vector &input0_shape, const std::vector &input1_shape, bool broadcast, - const std::vector &output_shape, int *input0_data, int *input1_data, int *output_data, int type, - int act_type, const int thread_num); - void TearDown() override; - - public: - float err_tol = 1e-5; - lite::Tensor in_tensor_0_; - lite::Tensor in_tensor_1_; - lite::Tensor out_tensor_; - std::vector inputs_{&in_tensor_0_, &in_tensor_1_}; - std::vector outputs_{&out_tensor_}; - ArithmeticParameter param_; - kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt, schema::PrimitiveType_Eltwise}; - lite::InnerContext ctx_ = lite::InnerContext(); - kernel::KernelCreator creator_ = nullptr; - kernel::LiteKernel *kernel_ = nullptr; -}; - -void TestArithmeticTestFp32::PrepareInt(const std::vector &input0_shape, const std::vector &input1_shape, - bool broadcast, const std::vector &output_shape, int *input0_data, - int *input1_data, int *output_data, int type, int act_type, - const int thread_num) { - param_.broadcasting_ = true; - param_.op_parameter_.type_ = type; - param_.ndim_ = input0_shape.size(); - param_.activation_type_ = act_type; - param_.broadcasting_ = broadcast; - for (size_t i = 0; i < input0_shape.size(); ++i) { - param_.in_shape0_[i] = input0_shape[i]; - } - for (size_t i = 0; i < input1_shape.size(); ++i) { - param_.in_shape1_[i] = input1_shape[i]; - } - for (size_t i = 0; i < output_shape.size(); ++i) { - param_.out_shape_[i] = output_shape[i]; - } - - in_tensor_0_.set_data_type(kNumberTypeInt); - in_tensor_0_.set_data(input0_data); - in_tensor_0_.set_shape(input0_shape); - in_tensor_1_.set_data(input1_data); - in_tensor_1_.set_shape(input1_shape); - out_tensor_.set_data(output_data); - out_tensor_.set_shape(output_shape); - - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc_); - ASSERT_NE(creator, nullptr); - ctx_.thread_num_ = thread_num; - ASSERT_EQ(lite::RET_OK, ctx_.Init()); - kernel_ = creator(inputs_, outputs_, reinterpret_cast(¶m_), &ctx_, desc_, nullptr); - ASSERT_NE(kernel_, nullptr); -} - -void TestArithmeticTestFp32::TearDown() { - in_tensor_0_.set_data(nullptr); - in_tensor_1_.set_data(nullptr); - out_tensor_.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, AddTest) { - auto add_param = new ArithmeticParameter(); - add_param->ndim_ = 4; - add_param->in_shape0_[0] = 1; - add_param->in_shape0_[1] = 2; - add_param->in_shape0_[2] = 3; - add_param->in_shape0_[3] = 4; - add_param->in_shape1_[0] = 1; - add_param->in_shape1_[1] = 1; - add_param->in_shape1_[2] = 1; - add_param->in_shape1_[3] = 4; - add_param->out_shape_[0] = 1; - add_param->out_shape_[1] = 2; - add_param->out_shape_[2] = 3; - add_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - auto in_ptr = in.data(); - std::vector add = {0.9035316, 0.022212252, 0.3038014, 0.3478275}; - auto add_ptr = add.data(); - std::vector correct_out = {13.119816, 3.368904, 15.631221, 5.5827856, 1.7079077, 9.9744, - 15.031756, -7.7328877, 14.617362, 8.078041, 6.888335, -8.904492, - -3.3416586, 11.572254, 9.565813, 1.6258626, 7.629906, -3.9079323, - 4.0682936, -8.254251, -2.4522753, 13.641247, -2.365638, 3.548678}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 2 * 3 * 4; - auto out = new float[size]; - - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastAdd(in_ptr, add_ptr, tile_data0, tile_data1, out, size, add_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete add_param; -} - -TEST_F(TestArithmeticTestFp32, MulTest) { - auto mul_param = new ArithmeticParameter(); - mul_param->ndim_ = 4; - mul_param->in_shape0_[0] = 1; - mul_param->in_shape0_[1] = 2; - mul_param->in_shape0_[2] = 3; - mul_param->in_shape0_[3] = 4; - mul_param->in_shape1_[0] = 1; - mul_param->in_shape1_[1] = 1; - mul_param->in_shape1_[2] = 1; - mul_param->in_shape1_[3] = 4; - mul_param->out_shape_[0] = 1; - mul_param->out_shape_[1] = 2; - mul_param->out_shape_[2] = 3; - mul_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - auto in_ptr = in.data(); - std::vector add = {0.16771512, 0.7336843, 0.6768286, 0.4453379}; - auto add_ptr = add.data(); - std::vector correct_out = {2.0488555, 2.4554152, 10.374036, 2.3313253, 0.13490601, 7.3017635, - 9.968302, -3.5986485, 2.3000166, 5.910435, 4.4566007, -4.120409, - -0.71198255, 8.474085, 6.2687945, 0.5691575, 1.1281147, -2.8834853, - 2.547916, -3.8308315, -0.56281954, 9.992072, -1.8067529, 1.42546}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 2 * 3 * 4; - auto out = new float[size]; - - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastMul(in_ptr, add_ptr, tile_data0, tile_data1, out, size, mul_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete mul_param; -} - -TEST_F(TestArithmeticTestFp32, DivTest) { - auto div_param = new ArithmeticParameter(); - div_param->ndim_ = 4; - div_param->in_shape0_[0] = 1; - div_param->in_shape0_[1] = 2; - div_param->in_shape0_[2] = 3; - div_param->in_shape0_[3] = 4; - div_param->in_shape1_[0] = 1; - div_param->in_shape1_[1] = 1; - div_param->in_shape1_[2] = 1; - div_param->in_shape1_[3] = 4; - div_param->out_shape_[0] = 1; - div_param->out_shape_[1] = 2; - div_param->out_shape_[2] = 3; - div_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - auto in_ptr = in.data(); - std::vector add = {1.6771512, -7.336843, 0.6768286, 4.453379}; - auto add_ptr = add.data(); - std::vector correct_out = {7.28394912, -0.45614875, 22.64593872, 1.17550247, 0.47960852, -1.35646735, - 21.76024329, -1.8145132, 8.17685967, -1.09799665, 9.72850985, -2.07759546, - -2.53119099, -1.5742523, 13.68442764, 0.28698101, 4.01059523, 0.53567243, - 5.56195764, -1.93158453, -2.000897, -1.85625275, -3.94404034, 0.71874648}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 1 * 3 * 4; - auto out = new float[size]; - - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastDiv(in_ptr, add_ptr, tile_data0, tile_data1, out, size, div_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete div_param; -} - -TEST_F(TestArithmeticTestFp32, DivTest2) { - std::vector in0 = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; - std::vector in1 = {5, 10, 2, 8, 2, 3, 7, 80, 45, 20}; - std::vector correct_out = {2, 2, 15, 5, 25, 20, 10, 1, 2, 5}; - constexpr int kOutSize = 10; - float out[kOutSize]; - ElementDiv(in0.data(), in1.data(), out, kOutSize); - std::cout << "out: "; - for (int i = 0; i < kOutSize; ++i) { - std::cout << out[i] << " "; - } - std::cout << "\n"; - ASSERT_EQ(0, CompareOutputData(out, correct_out.data(), kOutSize, 0.00001)); -} - -TEST_F(TestArithmeticTestFp32, FloorDivTest) { - auto fdiv_param = new ArithmeticParameter(); - fdiv_param->ndim_ = 4; - fdiv_param->in_shape0_[0] = 1; - fdiv_param->in_shape0_[1] = 1; - fdiv_param->in_shape0_[2] = 3; - fdiv_param->in_shape0_[3] = 4; - fdiv_param->in_shape1_[0] = 1; - fdiv_param->in_shape1_[1] = 1; - fdiv_param->in_shape1_[2] = 1; - fdiv_param->in_shape1_[3] = 4; - fdiv_param->out_shape_[0] = 1; - fdiv_param->out_shape_[1] = 1; - fdiv_param->out_shape_[2] = 3; - fdiv_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {1.1, -1.1, 3.123, -5.432, 0.1234, -0.0312, 12.1, 21.1, 9.1, 9.0, -100, 0.1}; - auto in_ptr = in.data(); - std::vector add = {1, 3, 2, 0.3}; - auto add_ptr = add.data(); - std::vector correct_out = {1, -1, 1, -19, 0, -1, 6, 70, 9, 3, -50, 0}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 1 * 3 * 4; - auto out = new float[size]; - - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - int ret = BroadcastFloorDiv(in_ptr, add_ptr, tile_data0, tile_data1, out, size, fdiv_param); - EXPECT_EQ(ret, 0); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete fdiv_param; -} - -TEST_F(TestArithmeticTestFp32, FloorModTest) { - auto fmod_param = new ArithmeticParameter(); - fmod_param->ndim_ = 4; - fmod_param->in_shape0_[0] = 1; - fmod_param->in_shape0_[1] = 1; - fmod_param->in_shape0_[2] = 3; - fmod_param->in_shape0_[3] = 4; - fmod_param->in_shape1_[0] = 1; - fmod_param->in_shape1_[1] = 1; - fmod_param->in_shape1_[2] = 1; - fmod_param->in_shape1_[3] = 4; - fmod_param->out_shape_[0] = 1; - fmod_param->out_shape_[1] = 1; - fmod_param->out_shape_[2] = 3; - fmod_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {1.1, -1.1, 3.123, -5.432, 0.1234, -0.0312, 12.1, 21.1, 9.1, 9.0, -100, 0.1}; - auto in_ptr = in.data(); - std::vector add = {1, 3, 2, 0.3}; - auto add_ptr = add.data(); - std::vector correct_out = {0.100000, 1.900000, 1.123000, 0.268000, 0.123400, 2.968800, - 0.100000, 0.100000, 0.100000, 0.000000, 0.000000, 0.100000}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 1 * 3 * 4; - auto out = new float[size]; - - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - int ret = BroadcastFloorMod(in_ptr, add_ptr, tile_data0, tile_data1, out, size, fmod_param); - EXPECT_EQ(ret, 0); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete fmod_param; -} - -TEST_F(TestArithmeticTestFp32, LogicalAndTest) { - auto logical_and_param = new ArithmeticParameter(); - logical_and_param->ndim_ = 4; - logical_and_param->in_shape0_[0] = 1; - logical_and_param->in_shape0_[1] = 2; - logical_and_param->in_shape0_[2] = 3; - logical_and_param->in_shape0_[3] = 4; - logical_and_param->in_shape1_[0] = 1; - logical_and_param->in_shape1_[1] = 1; - logical_and_param->in_shape1_[2] = 1; - logical_and_param->in_shape1_[3] = 4; - logical_and_param->out_shape_[0] = 1; - logical_and_param->out_shape_[1] = 2; - logical_and_param->out_shape_[2] = 3; - logical_and_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0, 9.952188, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - auto in_ptr = in.data(); - std::vector add = {1.6771512, -7.336843, 0, 4.453379}; - auto add_ptr = add.data(); - std::vector correct_out = {1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1}; - auto correct_out_ptr = correct_out.data(); - int size = 1 * 2 * 3 * 4; - - auto out = new float[size]; - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastLogicalAnd(in_ptr, add_ptr, tile_data0, tile_data1, out, size, logical_and_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete logical_and_param; -} - -TEST_F(TestArithmeticTestFp32, LogicalOrTest) { - auto logical_or_param = new ArithmeticParameter(); - logical_or_param->ndim_ = 4; - logical_or_param->in_shape0_[0] = 1; - logical_or_param->in_shape0_[1] = 2; - logical_or_param->in_shape0_[2] = 3; - logical_or_param->in_shape0_[3] = 4; - logical_or_param->in_shape1_[0] = 1; - logical_or_param->in_shape1_[1] = 1; - logical_or_param->in_shape1_[2] = 1; - logical_or_param->in_shape1_[3] = 4; - logical_or_param->out_shape_[0] = 1; - logical_or_param->out_shape_[1] = 2; - logical_or_param->out_shape_[2] = 3; - logical_or_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 0, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, 0, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - - auto in_ptr = in.data(); - std::vector add = {1.6771512, 0, 0.6768286, 0}; - auto add_ptr = add.data(); - std::vector correct_out = {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 2 * 3 * 4; - - auto out = new float[size]; - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastLogicalOr(in_ptr, add_ptr, tile_data0, tile_data1, out, size, logical_or_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete logical_or_param; -} - -TEST_F(TestArithmeticTestFp32, MaximumTest) { - auto maximum_param = new ArithmeticParameter(); - maximum_param->ndim_ = 4; - maximum_param->in_shape0_[0] = 1; - maximum_param->in_shape0_[1] = 2; - maximum_param->in_shape0_[2] = 3; - maximum_param->in_shape0_[3] = 4; - maximum_param->in_shape1_[0] = 1; - maximum_param->in_shape1_[1] = 1; - maximum_param->in_shape1_[2] = 1; - maximum_param->in_shape1_[3] = 4; - maximum_param->out_shape_[0] = 1; - maximum_param->out_shape_[1] = 2; - maximum_param->out_shape_[2] = 3; - maximum_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 0, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, 0, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - - auto in_ptr = in.data(); - std::vector add = {1.6771512, 6.34876, 3.6768286, 2.936284}; - auto add_ptr = add.data(); - std::vector correct_out = {12.216284, 6.34876, 15.327419, 5.234958, 1.6771512, 6.34876, - 14.727955, 2.936284, 13.71383, 8.055829, 6.5845337, 2.936284, - 1.6771512, 11.550042, 9.262012, 2.936284, 6.7263746, 6.34876, - 3.764492, 2.93628, 1.6771512, 13.619035, 3.6768286, 3.2008505}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 2 * 3 * 4; - - auto out = new float[size]; - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastMaximum(in_ptr, add_ptr, tile_data0, tile_data1, out, size, maximum_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete maximum_param; -} - -TEST_F(TestArithmeticTestFp32, MinimumTest) { - auto minimum_param = new ArithmeticParameter(); - minimum_param->ndim_ = 4; - minimum_param->in_shape0_[0] = 1; - minimum_param->in_shape0_[1] = 2; - minimum_param->in_shape0_[2] = 3; - minimum_param->in_shape0_[3] = 4; - minimum_param->in_shape1_[0] = 1; - minimum_param->in_shape1_[1] = 1; - minimum_param->in_shape1_[2] = 1; - minimum_param->in_shape1_[3] = 4; - minimum_param->out_shape_[0] = 1; - minimum_param->out_shape_[1] = 2; - minimum_param->out_shape_[2] = 3; - minimum_param->out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector in = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 0, 14.727955, -8.080715, - 13.71383, 8.055829, 6.5845337, -9.25232, -4.24519, 11.550042, 9.262012, 1.2780352, - 6.7263746, -3.9301445, 3.764492, 0, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - - auto in_ptr = in.data(); - std::vector add = {1.6771512, 6.34876, 3.6768286, 2.936284}; - auto add_ptr = add.data(); - std::vector correct_out = {1.6771512, 3.3466918, 3.6768286, 2.936284, 0.804376, 0, - 3.6768286, -8.080715, 1.6771512, 6.34876, 3.6768286, -9.25232, - -4.24519, 6.34876, 3.6768286, 1.2780352, 1.6771512, -3.9301445, - 3.6768286, 0, -3.3558068, 6.34876, -2.6694393, 2.936284}; - auto correct_out_ptr = correct_out.data(); - - int size = 1 * 2 * 3 * 4; - - auto out = new float[size]; - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastMinimum(in_ptr, add_ptr, tile_data0, tile_data1, out, size, minimum_param); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete minimum_param; -} - -TEST_F(TestArithmeticTestFp32, SquaredDifferenceTest) { - auto add_param = new ArithmeticParameter(); - add_param->ndim_ = 3; - add_param->in_shape0_[0] = 2; - add_param->in_shape0_[1] = 3; - add_param->in_shape0_[2] = 2; - add_param->in_shape1_[0] = 2; - add_param->in_shape1_[1] = 1; - add_param->in_shape1_[2] = 2; - add_param->out_shape_[0] = 2; - add_param->out_shape_[1] = 3; - add_param->out_shape_[2] = 2; - - /* 1x2x3x4 NHWC */ - std::vector in = {10, 11, 12, 13, 14, 15, 20, 21, 22, 23, 24, 25}; - auto in_ptr = in.data(); - std::vector add = {30, 31, 32, 33}; - auto add_ptr = add.data(); - std::vector correct_out = {400, 400, 324, 324, 256, 256, 144, 144, 100, 100, 64, 64}; - auto correct_out_ptr = correct_out.data(); - - int size = 2 * 3 * 2; - auto out = new float[size]; - - auto tile_data0 = new float[size]; - auto tile_data1 = new float[size]; - BroadcastSub(in_ptr, add_ptr, tile_data0, tile_data1, out, size, add_param); - ElementMul(out, out, out, size); - ASSERT_EQ(0, CompareOutputData(out, correct_out_ptr, size, 0.00001)); - - delete[] out; - delete[] tile_data0; - delete[] tile_data1; - delete add_param; -} - -TEST_F(TestArithmeticTestFp32, MulFp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter mul_param; - mul_param.broadcasting_ = true; - mul_param.op_parameter_.type_ = schema::PrimitiveType_Mul; - mul_param.ndim_ = 4; - mul_param.in_shape0_[0] = 1; - mul_param.in_shape0_[1] = 2; - mul_param.in_shape0_[2] = 3; - mul_param.in_shape0_[3] = 4; - mul_param.in_shape1_[0] = 1; - mul_param.in_shape1_[1] = 1; - mul_param.in_shape1_[2] = 1; - mul_param.in_shape1_[3] = 4; - mul_param.out_shape_[0] = 1; - mul_param.out_shape_[1] = 2; - mul_param.out_shape_[2] = 3; - mul_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {0.16771512, 0.7336843, 0.6768286, 0.4453379}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&mul_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {2.0488555, 2.4554152, 10.374036, 2.3313253, 0.13490601, 7.3017635, - 9.968302, -3.5986485, 2.3000166, 5.910435, 4.4566007, -4.120409, - -0.71198255, 8.474085, 6.2687945, 0.5691575, 1.1281147, -2.8834853, - 2.547916, -3.8308315, -0.56281954, 9.992072, -1.8067529, 1.42546}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, MulReluFp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter mul_param; - mul_param.broadcasting_ = true; - mul_param.op_parameter_.type_ = schema::PrimitiveType_Mul; - mul_param.ndim_ = 4; - mul_param.activation_type_ = schema::ActivationType_RELU; - mul_param.in_shape0_[0] = 1; - mul_param.in_shape0_[1] = 2; - mul_param.in_shape0_[2] = 3; - mul_param.in_shape0_[3] = 4; - mul_param.in_shape1_[0] = 1; - mul_param.in_shape1_[1] = 1; - mul_param.in_shape1_[2] = 1; - mul_param.in_shape1_[3] = 4; - mul_param.out_shape_[0] = 1; - mul_param.out_shape_[1] = 2; - mul_param.out_shape_[2] = 3; - mul_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {0.16771512, 0.7336843, 0.6768286, 0.4453379}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&mul_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {2.0488555, 2.4554152, 10.374036, 2.3313253, 0.13490601, 7.3017635, - 9.968302, 0, 2.3000166, 5.910435, 4.4566007, 0, - 0, 8.474085, 6.2687945, 0.5691575, 1.1281147, 0, - 2.547916, 0, 0, 9.992072, 0, 1.42546}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, MulRelu6Fp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter mul_param; - mul_param.broadcasting_ = true; - mul_param.op_parameter_.type_ = schema::PrimitiveType_Mul; - mul_param.ndim_ = 4; - mul_param.activation_type_ = schema::ActivationType_RELU6; - mul_param.in_shape0_[0] = 1; - mul_param.in_shape0_[1] = 2; - mul_param.in_shape0_[2] = 3; - mul_param.in_shape0_[3] = 4; - mul_param.in_shape1_[0] = 1; - mul_param.in_shape1_[1] = 1; - mul_param.in_shape1_[2] = 1; - mul_param.in_shape1_[3] = 4; - mul_param.out_shape_[0] = 1; - mul_param.out_shape_[1] = 2; - mul_param.out_shape_[2] = 3; - mul_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {0.16771512, 0.7336843, 0.6768286, 0.4453379}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&mul_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {2.0488555, 2.4554152, 6, 2.3313253, 0.13490601, 6, 6, 0, - 2.3000166, 5.910435, 4.4566007, 0, 0, 6, 6, 0.5691575, - 1.1281147, 0, 2.547916, 0, 0, 6, 0, 1.42546}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, MulInt0) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1, 1, 1, 3}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int in1_data[3] = {3, 2, 1}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_NO_ACTIVATION; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 2, 2, 9, 8, 5, 18, 14, 8, 27, 20, 11}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulInt1) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int in1_data[1] = {2}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_NO_ACTIVATION; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulInt2) { - std::vector input0_shape{1}; - std::vector input1_shape{1, 2, 2, 3}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[1] = {2}; - int in1_data[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_NO_ACTIVATION; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulInt3) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1, 2, 2, 3}; - bool broadcast = false; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}; - int in1_data[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_NO_ACTIVATION; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulReluInt0) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1, 1, 1, 3}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int in1_data[3] = {-1, 1, 1}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 1, 2, 0, 4, 5, 0, 7, 8, 0, 10, 11}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulReluInt1) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {0, -1, -2, -3, -4, -5, 6, 7, 8, 9, 10, 11}; - int in1_data[1] = {1}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulReluInt2) { - std::vector input0_shape{1}; - std::vector input1_shape{1, 2, 2, 3}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[1] = {1}; - int in1_data[12] = {0, -1, -2, -3, -4, -5, 6, 7, 8, 9, 10, 11}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulReluInt3) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1, 2, 2, 3}; - bool broadcast = false; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - int in1_data[12] = {0, -1, -2, -3, -4, -5, 6, 7, 8, 9, 10, 11}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulRelu6Int0) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1, 1, 1, 3}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - int in1_data[3] = {-1, 1, 1}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU6; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 1, 2, 0, 4, 5, 0, 6, 6, 0, 6, 6}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulRelu6Int1) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {0, -1, -2, -3, -4, -5, 6, 7, 8, 9, 10, 11}; - int in1_data[1] = {1}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU6; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulRelu6Int2) { - std::vector input0_shape{1}; - std::vector input1_shape{1, 2, 2, 3}; - bool broadcast = true; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[1] = {1}; - int in1_data[12] = {0, -1, -2, -3, -4, -5, 6, 7, 8, 9, 10, 11}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU6; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, MulRelu6Int3) { - std::vector input0_shape{1, 2, 2, 3}; - std::vector input1_shape{1, 2, 2, 3}; - bool broadcast = false; - std::vector output_shape{1, 2, 2, 3}; - int in0_data[12] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - int in1_data[12] = {0, -1, -2, -3, -4, -5, 6, 7, 8, 9, 10, 11}; - int out_data[12] = {0}; - schema::PrimitiveType type = schema::PrimitiveType_Mul; - int act_type = schema::ActivationType_RELU6; - int thread_num = 2; - desc_.type = type; - PrepareInt(input0_shape, input1_shape, broadcast, output_shape, in0_data, in1_data, out_data, type, act_type, - thread_num); - kernel_->Run(); - - int correct_data[12] = {0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6}; - - ASSERT_EQ(0, CompareOutputData(out_data, correct_data, 12, err_tol)); -} - -TEST_F(TestArithmeticTestFp32, AddReluFp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter add_param; - add_param.broadcasting_ = true; - add_param.op_parameter_.type_ = schema::PrimitiveType_Add; - add_param.ndim_ = 4; - add_param.activation_type_ = schema::ActivationType_RELU; - add_param.in_shape0_[0] = 1; - add_param.in_shape0_[1] = 2; - add_param.in_shape0_[2] = 3; - add_param.in_shape0_[3] = 4; - add_param.in_shape1_[0] = 1; - add_param.in_shape1_[1] = 1; - add_param.in_shape1_[2] = 1; - add_param.in_shape1_[3] = 4; - add_param.out_shape_[0] = 1; - add_param.out_shape_[1] = 2; - add_param.out_shape_[2] = 3; - add_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {0.9035316, 0.022212252, 0.3038014, 0.3478275}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&add_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = { - 13.119816, 3.368904, 15.631221, 5.5827856, 1.7079077, 9.9744, 15.031756, 0, 14.617362, 8.078041, 6.888335, 0, 0, - 11.572254, 9.565813, 1.6258626, 7.629906, 0, 4.0682936, 0, 0, 13.641247, 0, 3.548678}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, AddRelu6Fp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter add_param; - add_param.broadcasting_ = true; - add_param.op_parameter_.type_ = schema::PrimitiveType_Add; - add_param.ndim_ = 4; - add_param.activation_type_ = schema::ActivationType_RELU6; - add_param.in_shape0_[0] = 1; - add_param.in_shape0_[1] = 2; - add_param.in_shape0_[2] = 3; - add_param.in_shape0_[3] = 4; - add_param.in_shape1_[0] = 1; - add_param.in_shape1_[1] = 1; - add_param.in_shape1_[2] = 1; - add_param.in_shape1_[3] = 4; - add_param.out_shape_[0] = 1; - add_param.out_shape_[1] = 2; - add_param.out_shape_[2] = 3; - add_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {0.9035316, 0.022212252, 0.3038014, 0.3478275}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&add_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {6, 3.368904, 6, 5.5827856, 1.7079077, 6, 6, 0, 6, 6, 6, 0, - 0, 6, 6, 1.6258626, 6, 0, 4.0682936, 0, 0, 6, 0, 3.548678}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, DivReluFp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter div_param; - div_param.broadcasting_ = true; - div_param.op_parameter_.type_ = schema::PrimitiveType_Div; - div_param.ndim_ = 4; - div_param.activation_type_ = schema::ActivationType_RELU; - div_param.in_shape0_[0] = 1; - div_param.in_shape0_[1] = 2; - div_param.in_shape0_[2] = 3; - div_param.in_shape0_[3] = 4; - div_param.in_shape1_[0] = 1; - div_param.in_shape1_[1] = 1; - div_param.in_shape1_[2] = 1; - div_param.in_shape1_[3] = 4; - div_param.out_shape_[0] = 1; - div_param.out_shape_[1] = 2; - div_param.out_shape_[2] = 3; - div_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {1.6771512, -7.336843, 0.6768286, 4.453379}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&div_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {7.28394912, 0, 22.64593872, 1.17550247, 0.47960852, 0, - 21.76024329, 0, 8.17685967, 0, 9.72850985, 0, - 0, 0, 13.68442764, 0.28698101, 4.01059523, 0.53567243, - 5.56195764, 0, 0, 0, 0, 0.71874648}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, DivRelu6Fp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter div_param; - div_param.broadcasting_ = true; - div_param.op_parameter_.type_ = schema::PrimitiveType_Div; - div_param.ndim_ = 4; - div_param.activation_type_ = schema::ActivationType_RELU6; - div_param.in_shape0_[0] = 1; - div_param.in_shape0_[1] = 2; - div_param.in_shape0_[2] = 3; - div_param.in_shape0_[3] = 4; - div_param.in_shape1_[0] = 1; - div_param.in_shape1_[1] = 1; - div_param.in_shape1_[2] = 1; - div_param.in_shape1_[3] = 4; - div_param.out_shape_[0] = 1; - div_param.out_shape_[1] = 2; - div_param.out_shape_[2] = 3; - div_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {1.6771512, -7.336843, 0.6768286, 4.453379}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&div_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {6, 0, 6, 1.17550247, 0.47960852, 0, 6, 0, 6, 0, 6, 0, - 0, 0, 6, 0.28698101, 4.01059523, 0.53567243, 5.56195764, 0, 0, 0, 0, 0.71874648}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} - -TEST_F(TestArithmeticTestFp32, EqualFp32) { - std::vector inputs_tensor; - std::vector outputs_tensor; - - ArithmeticParameter equal_param; - equal_param.broadcasting_ = true; - equal_param.op_parameter_.type_ = schema::PrimitiveType_Equal; - equal_param.ndim_ = 4; - equal_param.in_shape0_[0] = 1; - equal_param.in_shape0_[1] = 2; - equal_param.in_shape0_[2] = 3; - equal_param.in_shape0_[3] = 4; - equal_param.in_shape1_[0] = 1; - equal_param.in_shape1_[1] = 1; - equal_param.in_shape1_[2] = 1; - equal_param.in_shape1_[3] = 4; - equal_param.out_shape_[0] = 1; - equal_param.out_shape_[1] = 2; - equal_param.out_shape_[2] = 3; - equal_param.out_shape_[3] = 4; - - /* 1x2x3x4 NHWC */ - std::vector input0 = {12.216284, 3.3466918, 15.327419, 5.234958, 0.804376, 9.952188, - 14.727955, -8.080715, 13.71383, 8.055829, 6.5845337, -9.25232, - -4.24519, 11.550042, 9.262012, 1.2780352, 6.7263746, -3.9301445, - 3.764492, -8.602078, -3.3558068, 13.619035, -2.6694393, 3.2008505}; - std::vector input0_shape = {1, 2, 3, 4}; - std::vector input1 = {0.16771512, 3.3466918, 0.6768286, 3.2008505}; - std::vector input1_shape = {1, 1, 1, 4}; - - lite::Tensor input0_tensor; - lite::Tensor input1_tensor; - input0_tensor.set_data_type(kNumberTypeFloat32); - input0_tensor.set_data(input0.data()); - input1_tensor.set_data(input1.data()); - input0_tensor.set_shape(input0_shape); - input1_tensor.set_shape(input1_shape); - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - - std::vector output(24); - std::vector output_shape = {1, 2, 3, 4}; - - lite::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.set_data(output.data()); - output0_tensor.set_shape(output_shape); - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Eltwise}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - ASSERT_NE(creator, nullptr); - lite::InnerContext ctx; - ctx.thread_num_ = 3; - ASSERT_EQ(lite::RET_OK, ctx.Init()); - kernel::LiteKernel *kernel = - creator(inputs_tensor, outputs_tensor, reinterpret_cast(&equal_param), &ctx, desc, nullptr); - ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); - kernel->Run(); - - std::vector correct_out = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; - auto correct_out_ptr = correct_out.data(); - - ASSERT_EQ(0, CompareOutputData(output.data(), correct_out_ptr, 24, 0.00001)); - - input0_tensor.set_data(nullptr); - input1_tensor.set_data(nullptr); - output0_tensor.set_data(nullptr); -} -} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc index 4e73ad62d8..4e52063652 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc @@ -16,7 +16,7 @@ #include "src/common/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/nnacl/batch_to_space.h" -#include "mindspore/lite/nnacl/arithmetic_common.h" +#include "mindspore/lite/nnacl/common_func.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc index e280b04a19..0578b2bd6d 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc @@ -16,7 +16,7 @@ #include "src/common/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/nnacl/depth_to_space.h" -#include "mindspore/lite/nnacl/arithmetic_common.h" +#include "mindspore/lite/nnacl/common_func.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc deleted file mode 100644 index e1ac3b57f9..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/bias_add_int8_tests.cc +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "schema/inner/model_generated.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/kernel/arm/int8/bias_add_int8.h" -#include "mindspore/lite/src/kernel_registry.h" - -using mindspore::lite::DeviceType; - -namespace mindspore { -class TestBiasAddInt8 : public mindspore::CommonTest { - public: - TestBiasAddInt8() {} -}; - -TEST_F(TestBiasAddInt8, BiasAdd) { - lite::Tensor in_tensor0(kNumberTypeInt8, {1, 2, 3, 2}); - lite::Tensor in_tensor1(kNumberTypeInt8, {2}); - lite::Tensor out_tensor(kNumberTypeInt8, {1, 2, 3, 2}); - int8_t input_data0[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - int8_t input_data1[] = {1, 1}; - int8_t output_data[12] = {0}; - in_tensor0.set_data(input_data0); - in_tensor1.set_data(input_data1); - out_tensor.set_data(output_data); - std::vector inputs = {&in_tensor0, &in_tensor1}; - std::vector outputs = {&out_tensor}; - - ArithmeticParameter parameter = {}; - int dims[] = {1, 2, 3, 4}; - parameter.ndim_ = 4; - for (int i = 0; i < 4; i++) { - parameter.in_shape0_[i] = dims[i]; - parameter.in_shape1_[i] = 1; - parameter.out_shape_[i] = dims[i]; - } - parameter.in_shape1_[3] = dims[3]; - - kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_BiasAdd}; - - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - EXPECT_NE(creator, nullptr); - - auto ctx = std::make_shared(); - ASSERT_EQ(lite::RET_OK, ctx->Init()); - auto kernel = creator(inputs, outputs, reinterpret_cast(¶meter), ctx.get(), desc, nullptr); - EXPECT_NE(kernel, nullptr); - - auto ret = kernel->Run(); - EXPECT_EQ(0, ret); - - float expect[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}; - for (int i = 0; i < 12; ++i) { - EXPECT_EQ(output_data[i], expect[i]); - } - - in_tensor0.set_data(nullptr); - in_tensor1.set_data(nullptr); - out_tensor.set_data(nullptr); -} -} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc index ef924c9b12..87bad0fdf2 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "ut/src/runtime/kernel/opencl/common.h" -#include "nnacl/arithmetic_common.h" +#include "nnacl/arithmetic.h" namespace mindspore::lite::opencl::test {