!11956 [MSLITE]remove mul/add/div/sub from arithmetic_fp32

From: @wangchengyuan
Reviewed-by: @zhang_xue_tong
Signed-off-by: @zhang_xue_tong
pull/11956/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 2575310c12

@ -0,0 +1,225 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp32/add_fp32.h"
#include "nnacl/fp32/arithmetic_fp32.h"
int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vaddq_f32(vin0_opt, vin1);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[0] + in1[index];
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vout = vaddq_f32(vin0, vin1_opt);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] + in1[0];
}
}
return NNACL_OK;
}
int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
int32x4_t vin0_opt = vdupq_n_s32(in0[0]);
int32x4_t vin1_opt = vdupq_n_s32(in1[0]);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
int32x4_t vin1 = vld1q_s32(in1 + index);
int32x4_t vout = vaddq_s32(vin0_opt, vin1);
vst1q_s32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[0] + in1[index];
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
int32x4_t vin0 = vld1q_s32(in0 + index);
int32x4_t vout = vaddq_s32(vin0, vin1_opt);
vst1q_s32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] + in1[0];
}
}
return NNACL_OK;
}
int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
float32x4_t zeros = vdupq_n_f32(0.0f);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMAX(in0[0] + in1[index], 0);
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vout = vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMAX(in0[index] + in1[0], 0);
}
}
return NNACL_OK;
}
int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
float32x4_t zeros = vdupq_n_f32(0.0f);
float32x4_t bounds = vdupq_n_f32(6.0f);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros), bounds);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[0] + in1[index], 0), 6);
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros), bounds);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[index] + in1[0], 0), 6);
}
}
return NNACL_OK;
}
int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param) {
TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param);
return ElementAdd(tile_in0, tile_in1, out, size);
}
int ElementAdd(const float *in0, const float *in1, float *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vaddq_f32(vin0, vin1);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] + in1[index];
}
return NNACL_OK;
}
int ElementAddRelu(const float *in0, const float *in1, float *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0.0f);
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vaddq_f32(vin0, vin1);
vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
float res = in0[index] + in1[index];
out[index] = res > 0 ? res : 0;
}
return NNACL_OK;
}
int ElementAddRelu6(const float *in0, const float *in1, float *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0.0f);
float32x4_t bounds = vdupq_n_f32(6.0f);
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1), zeros), bounds);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[index] + in1[index], 0), 6);
}
return NNACL_OK;
}
int ElementAddInt(const int *in0, const int *in1, int *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
int32x4_t vin0 = vld1q_s32(in0 + index);
int32x4_t vin1 = vld1q_s32(in1 + index);
int32x4_t vout = vaddq_s32(vin0, vin1);
vst1q_s32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] + in1[index];
}
return NNACL_OK;
}

@ -0,0 +1,45 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_FP32_ADD_H_
#define MINDSPORE_LITE_NNACL_FP32_ADD_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/op_base.h"
#include "nnacl/base/arithmetic_base.h"
#include "nnacl/errorcode.h"
#ifdef __cplusplus
extern "C" {
#endif
int ElementAdd(const float *in0, const float *in1, float *out, int size);
int ElementAddRelu(const float *in0, const float *in1, float *out, int size);
int ElementAddRelu6(const float *in0, const float *in1, float *out, int size);
int ElementAddInt(const int *in0, const int *in1, int *out, int size);
int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_FP32_ADD_H_

File diff suppressed because it is too large Load Diff

@ -22,6 +22,11 @@
#include "nnacl/op_base.h"
#include "nnacl/base/arithmetic_base.h"
#include "nnacl/errorcode.h"
#include "nnacl/fp32/add_fp32.h"
#include "nnacl/fp32/mul_fp32.h"
#include "nnacl/fp32/div_fp32.h"
#include "nnacl/fp32/sub_fp32.h"
#include "nnacl/fp32/squared_difference.h"
#ifdef __cplusplus
extern "C" {
@ -30,56 +35,6 @@ void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t n
const int *inStrides, const int *outStrides, const int *multiple);
void TileDimensionsFp32(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
ArithmeticParameter *param);
/* Mul */
int ElementMul(const float *in0, const float *in1, float *out, int size);
int ElementMulRelu(const float *in0, const float *in1, float *out, int size);
int ElementMulRelu6(const float *in0, const float *in1, float *out, int size);
int ElementMulInt(const int *in0, const int *in1, int *out, int size);
int ElementMulReluInt(const int *in0, const int *in1, int *out, int size);
int ElementMulRelu6Int(const int *in0, const int *in1, int *out, int size);
int ElementOptMul(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptMulRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptMulRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptMulInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int ElementOptMulReluInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int ElementOptMulRelu6Int(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int BroadcastMul(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param);
/* Add */
int ElementAdd(const float *in0, const float *in1, float *out, int size);
int ElementAddRelu(const float *in0, const float *in1, float *out, int size);
int ElementAddRelu6(const float *in0, const float *in1, float *out, int size);
int ElementAddInt(const int *in0, const int *in1, int *out, int size);
int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param);
/* Sub */
int ElementSub(const float *in0, const float *in1, float *out, int size);
int ElementSubInt(const int *in0, const int *in1, int *out, int size);
int ElementSubRelu(const float *in0, const float *in1, float *out, int size);
int ElementSubRelu6(const float *in0, const float *in1, float *out, int size);
int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
/* Div */
int ElementDiv(const float *in0, const float *in1, float *out, int size);
int ElementDivRelu(const float *in0, const float *in1, float *out, int size);
int ElementDivRelu6(const float *in0, const float *in1, float *out, int size);
int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param);
/* logical and */
int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size);
int ElementLogicalAndInt(const int *in0, const int *in1, int *out, int size);
@ -88,9 +43,6 @@ int ElementLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size)
/* logical or */
int ElementLogicalOr(const float *in0, const float *in1, float *out, int size);
/* Element Squared Difference */
int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size);
/* max min */
int ElementMaximum(const float *in0, const float *in1, float *out, int size);
int ElementMinimum(const float *in0, const float *in1, float *out, int size);

@ -0,0 +1,107 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp32/div_fp32.h"
#include <math.h>
#include "nnacl/fp32/arithmetic_fp32.h"
int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < size; index++) {
out[index] = in0[0] / in1[index];
}
} else {
if (in1[0] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
for (int index = 0; index < size; index++) {
out[index] = in0[index] / in1[0];
}
}
return NNACL_OK;
}
int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < size; index++) {
out[index] = in0[0] / in1[index];
out[index] = out[index] > 0 ? out[index] : 0;
}
} else {
for (int index = 0; index < size; index++) {
out[index] = in0[index] / in1[0];
out[index] = out[index] > 0 ? out[index] : 0;
}
}
return NNACL_OK;
}
int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[0] / in1[index], 0), 6);
}
} else {
for (int index = 0; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[index] / in1[0], 0), 6);
}
}
return NNACL_OK;
}
int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < size; index++) {
out[index] = in0[0] / in1[index];
}
} else {
if (in1[0] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
for (int index = 0; index < size; index++) {
out[index] = in0[index] / in1[0];
}
}
return NNACL_OK;
}
int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param) {
TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param);
return ElementDiv(tile_in0, tile_in1, out, size);
}
int ElementDiv(const float *in0, const float *in1, float *out, int size) {
for (int i = 0; i < size; i++) {
out[i] = in0[i] / in1[i];
}
return NNACL_OK;
}
int ElementDivRelu(const float *in0, const float *in1, float *out, int size) {
for (int i = 0; i < size; i++) {
float res = in0[i] / in1[i];
out[i] = res > 0 ? res : 0;
}
return NNACL_OK;
}
int ElementDivRelu6(const float *in0, const float *in1, float *out, int size) {
for (int i = 0; i < size; i++) {
out[i] = MSMIN(MSMAX(in0[i] / in1[i], 0), 6);
}
return NNACL_OK;
}

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_FP32_DIV_H_
#define MINDSPORE_LITE_NNACL_FP32_DIV_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/op_base.h"
#include "nnacl/base/arithmetic_base.h"
#include "nnacl/errorcode.h"
#ifdef __cplusplus
extern "C" {
#endif
int ElementDiv(const float *in0, const float *in1, float *out, int size);
int ElementDivRelu(const float *in0, const float *in1, float *out, int size);
int ElementDivRelu6(const float *in0, const float *in1, float *out, int size);
int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_FP32_DIV_H_

@ -19,6 +19,7 @@
#include <float.h>
#include "nnacl/fp32/activation_fp32.h"
#include "nnacl/fp32/arithmetic_fp32.h"
#include "nnacl/fp32/mul_fp32.h"
void InitGate(float *gate_buffer, const float *bias, const LstmParameter *lstm_parm) {
int gate_offest = 0;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,49 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_FP32_MUL_H_
#define MINDSPORE_LITE_NNACL_FP32_MUL_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/op_base.h"
#include "nnacl/base/arithmetic_base.h"
#include "nnacl/errorcode.h"
#ifdef __cplusplus
extern "C" {
#endif
int ElementMul(const float *in0, const float *in1, float *out, int size);
int ElementMulRelu(const float *in0, const float *in1, float *out, int size);
int ElementMulRelu6(const float *in0, const float *in1, float *out, int size);
int ElementMulInt(const int *in0, const int *in1, int *out, int size);
int ElementMulReluInt(const int *in0, const int *in1, int *out, int size);
int ElementMulRelu6Int(const int *in0, const int *in1, int *out, int size);
int ElementOptMul(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptMulRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptMulRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptMulInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int ElementOptMulReluInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int ElementOptMulRelu6Int(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
int BroadcastMul(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
ArithmeticParameter *param);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_FP32_MUL_H_

@ -0,0 +1,28 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
#define MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
#include "nnacl/fp32/squared_difference.h"
#include "nnacl/fp32/sub_fp32.h"
#include "nnacl/fp32/mul_fp32.h"
int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size) {
ElementSub(in0, in1, out, size);
return ElementMul(out, out, out, size);
}
#endif // MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_

@ -0,0 +1,37 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
#define MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/op_base.h"
#include "nnacl/base/arithmetic_base.h"
#include "nnacl/errorcode.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Element Squared Difference */
int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_

@ -0,0 +1,217 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp32/sub_fp32.h"
int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vsubq_f32(vin0_opt, vin1);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[0] - in1[index];
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vout = vsubq_f32(vin0, vin1_opt);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] - in1[0];
}
}
return NNACL_OK;
}
int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
int32x4_t vin0_opt = vdupq_n_s32(in0[0]);
int32x4_t vin1_opt = vdupq_n_s32(in1[0]);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
int32x4_t vin1 = vld1q_s32(in1 + index);
int32x4_t vout = vsubq_s32(vin0_opt, vin1);
vst1q_s32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[0] - in1[index];
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
int32x4_t vin0 = vld1q_s32(in0 + index);
int32x4_t vout = vsubq_s32(vin0, vin1_opt);
vst1q_s32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] - in1[0];
}
}
return NNACL_OK;
}
int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
float32x4_t zeros = vdupq_n_f32(0.0f);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMAX(in0[0] - in1[index], 0);
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vout = vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMAX(in0[index] - in1[0], 0);
}
}
return NNACL_OK;
}
int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
#ifdef ENABLE_NEON
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
float32x4_t zeros = vdupq_n_f32(0.0f);
float32x4_t bounds = vdupq_n_f32(6.0f);
#endif
int index = 0;
if (param->in_elements_num0_ == 1) {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros), bounds);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[0] - in1[index], 0), 6);
}
} else {
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros), bounds);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[index] - in1[0], 0), 6);
}
}
return NNACL_OK;
}
int ElementSub(const float *in0, const float *in1, float *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vsubq_f32(vin0, vin1);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] - in1[index];
}
return NNACL_OK;
}
int ElementSubInt(const int *in0, const int *in1, int *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
for (; index <= size - 4; index += C4NUM) {
int32x4_t vin0 = vld1q_s32(in0 + index);
int32x4_t vin1 = vld1q_s32(in1 + index);
int32x4_t vout = vsubq_s32(vin0, vin1);
vst1q_s32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = in0[index] - in1[index];
}
return NNACL_OK;
}
int ElementSubRelu(const float *in0, const float *in1, float *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0.0f);
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vsubq_f32(vin0, vin1);
vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
float res = in0[index] - in1[index];
out[index] = res > 0 ? res : 0;
}
return NNACL_OK;
}
int ElementSubRelu6(const float *in0, const float *in1, float *out, int size) {
int index = 0;
#ifdef ENABLE_NEON
float32x4_t zeros = vdupq_n_f32(0.0f);
float32x4_t bounds = vdupq_n_f32(6.0f);
for (; index <= size - 4; index += C4NUM) {
float32x4_t vin0 = vld1q_f32(in0 + index);
float32x4_t vin1 = vld1q_f32(in1 + index);
float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1), zeros), bounds);
vst1q_f32(out + index, vout);
}
#endif
for (; index < size; index++) {
out[index] = MSMIN(MSMAX(in0[index] - in1[index], 0), 6);
}
return NNACL_OK;
}

@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_SUB_FP32_H_
#define MINDSPORE_LITE_NNACL_SUB_FP32_H_
#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/op_base.h"
#include "nnacl/base/arithmetic_base.h"
#include "nnacl/errorcode.h"
#ifdef __cplusplus
extern "C" {
#endif
int ElementSub(const float *in0, const float *in1, float *out, int size);
int ElementSubInt(const int *in0, const int *in1, int *out, int size);
int ElementSubRelu(const float *in0, const float *in1, float *out, int size);
int ElementSubRelu6(const float *in0, const float *in1, float *out, int size);
int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_NNACL_SUB_FP32_H_
Loading…
Cancel
Save