!11956 [MSLITE]remove mul/add/div/sub from arithmetic_fp32
From: @wangchengyuan Reviewed-by: @zhang_xue_tong Signed-off-by: @zhang_xue_tongpull/11956/MERGE
commit
2575310c12
@ -0,0 +1,225 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp32/add_fp32.h"
|
||||
#include "nnacl/fp32/arithmetic_fp32.h"
|
||||
|
||||
int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
|
||||
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vaddq_f32(vin0_opt, vin1);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[0] + in1[index];
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vout = vaddq_f32(vin0, vin1_opt);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] + in1[0];
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
int32x4_t vin0_opt = vdupq_n_s32(in0[0]);
|
||||
int32x4_t vin1_opt = vdupq_n_s32(in1[0]);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
int32x4_t vin1 = vld1q_s32(in1 + index);
|
||||
int32x4_t vout = vaddq_s32(vin0_opt, vin1);
|
||||
vst1q_s32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[0] + in1[index];
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
int32x4_t vin0 = vld1q_s32(in0 + index);
|
||||
int32x4_t vout = vaddq_s32(vin0, vin1_opt);
|
||||
vst1q_s32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] + in1[0];
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
|
||||
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMAX(in0[0] + in1[index], 0);
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vout = vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMAX(in0[index] + in1[0], 0);
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
|
||||
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
float32x4_t bounds = vdupq_n_f32(6.0f);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0_opt, vin1), zeros), bounds);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[0] + in1[index], 0), 6);
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1_opt), zeros), bounds);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[index] + in1[0], 0), 6);
|
||||
}
|
||||
}
|
||||
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
|
||||
ArithmeticParameter *param) {
|
||||
TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param);
|
||||
return ElementAdd(tile_in0, tile_in1, out, size);
|
||||
}
|
||||
|
||||
int ElementAdd(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vaddq_f32(vin0, vin1);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] + in1[index];
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementAddRelu(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vaddq_f32(vin0, vin1);
|
||||
vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
float res = in0[index] + in1[index];
|
||||
out[index] = res > 0 ? res : 0;
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementAddRelu6(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
float32x4_t bounds = vdupq_n_f32(6.0f);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vminq_f32(vmaxq_f32(vaddq_f32(vin0, vin1), zeros), bounds);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[index] + in1[index], 0), 6);
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementAddInt(const int *in0, const int *in1, int *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
int32x4_t vin0 = vld1q_s32(in0 + index);
|
||||
int32x4_t vin1 = vld1q_s32(in1 + index);
|
||||
int32x4_t vout = vaddq_s32(vin0, vin1);
|
||||
vst1q_s32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] + in1[index];
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_ADD_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_ADD_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/base/arithmetic_base.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int ElementAdd(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementAddRelu(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementAddRelu6(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementAddInt(const int *in0, const int *in1, int *out, int size);
|
||||
int ElementOptAdd(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptAddInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptAddRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptAddRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int BroadcastAdd(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
|
||||
ArithmeticParameter *param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_ADD_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,107 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp32/div_fp32.h"
|
||||
#include <math.h>
|
||||
#include "nnacl/fp32/arithmetic_fp32.h"
|
||||
|
||||
int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = in0[0] / in1[index];
|
||||
}
|
||||
} else {
|
||||
if (in1[0] == 0) {
|
||||
return NNACL_ERRCODE_DIVISOR_ZERO;
|
||||
}
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = in0[index] / in1[0];
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = in0[0] / in1[index];
|
||||
out[index] = out[index] > 0 ? out[index] : 0;
|
||||
}
|
||||
} else {
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = in0[index] / in1[0];
|
||||
out[index] = out[index] > 0 ? out[index] : 0;
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[0] / in1[index], 0), 6);
|
||||
}
|
||||
} else {
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[index] / in1[0], 0), 6);
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) {
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = in0[0] / in1[index];
|
||||
}
|
||||
} else {
|
||||
if (in1[0] == 0) {
|
||||
return NNACL_ERRCODE_DIVISOR_ZERO;
|
||||
}
|
||||
for (int index = 0; index < size; index++) {
|
||||
out[index] = in0[index] / in1[0];
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
|
||||
ArithmeticParameter *param) {
|
||||
TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param);
|
||||
return ElementDiv(tile_in0, tile_in1, out, size);
|
||||
}
|
||||
|
||||
int ElementDiv(const float *in0, const float *in1, float *out, int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
out[i] = in0[i] / in1[i];
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementDivRelu(const float *in0, const float *in1, float *out, int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
float res = in0[i] / in1[i];
|
||||
out[i] = res > 0 ? res : 0;
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementDivRelu6(const float *in0, const float *in1, float *out, int size) {
|
||||
for (int i = 0; i < size; i++) {
|
||||
out[i] = MSMIN(MSMAX(in0[i] / in1[i], 0), 6);
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_DIV_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_DIV_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/base/arithmetic_base.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int ElementDiv(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementDivRelu(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementDivRelu6(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementOptDiv(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptDivRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptDivRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptDivInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
|
||||
int BroadcastDiv(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
|
||||
ArithmeticParameter *param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_DIV_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_MUL_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_MUL_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/base/arithmetic_base.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int ElementMul(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementMulRelu(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementMulRelu6(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementMulInt(const int *in0, const int *in1, int *out, int size);
|
||||
int ElementMulReluInt(const int *in0, const int *in1, int *out, int size);
|
||||
int ElementMulRelu6Int(const int *in0, const int *in1, int *out, int size);
|
||||
int ElementOptMul(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptMulRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptMulRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptMulInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptMulReluInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptMulRelu6Int(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
|
||||
int BroadcastMul(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
|
||||
ArithmeticParameter *param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP32_MUL_H_
|
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
|
||||
#define MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
|
||||
|
||||
#include "nnacl/fp32/squared_difference.h"
|
||||
#include "nnacl/fp32/sub_fp32.h"
|
||||
#include "nnacl/fp32/mul_fp32.h"
|
||||
|
||||
int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size) {
|
||||
ElementSub(in0, in1, out, size);
|
||||
return ElementMul(out, out, out, size);
|
||||
}
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
|
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
|
||||
#define MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/base/arithmetic_base.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Element Squared Difference */
|
||||
int ElementSquaredDifference(const float *in0, const float *in1, float *out, int size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_SQUARED_DIFFERENCE_H_
|
@ -0,0 +1,217 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "nnacl/fp32/sub_fp32.h"
|
||||
|
||||
int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
|
||||
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vsubq_f32(vin0_opt, vin1);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[0] - in1[index];
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vout = vsubq_f32(vin0, vin1_opt);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] - in1[0];
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
int32x4_t vin0_opt = vdupq_n_s32(in0[0]);
|
||||
int32x4_t vin1_opt = vdupq_n_s32(in1[0]);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
int32x4_t vin1 = vld1q_s32(in1 + index);
|
||||
int32x4_t vout = vsubq_s32(vin0_opt, vin1);
|
||||
vst1q_s32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[0] - in1[index];
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
int32x4_t vin0 = vld1q_s32(in0 + index);
|
||||
int32x4_t vout = vsubq_s32(vin0, vin1_opt);
|
||||
vst1q_s32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] - in1[0];
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
|
||||
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMAX(in0[0] - in1[index], 0);
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vout = vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMAX(in0[index] - in1[0], 0);
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param) {
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t vin0_opt = vdupq_n_f32(in0[0]);
|
||||
float32x4_t vin1_opt = vdupq_n_f32(in1[0]);
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
float32x4_t bounds = vdupq_n_f32(6.0f);
|
||||
#endif
|
||||
int index = 0;
|
||||
if (param->in_elements_num0_ == 1) {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0_opt, vin1), zeros), bounds);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[0] - in1[index], 0), 6);
|
||||
}
|
||||
} else {
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1_opt), zeros), bounds);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[index] - in1[0], 0), 6);
|
||||
}
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementSub(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vsubq_f32(vin0, vin1);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] - in1[index];
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementSubInt(const int *in0, const int *in1, int *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
int32x4_t vin0 = vld1q_s32(in0 + index);
|
||||
int32x4_t vin1 = vld1q_s32(in1 + index);
|
||||
int32x4_t vout = vsubq_s32(vin0, vin1);
|
||||
vst1q_s32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = in0[index] - in1[index];
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementSubRelu(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vsubq_f32(vin0, vin1);
|
||||
vout = vbslq_f32(vcgtq_f32(vout, zeros), vout, zeros);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
float res = in0[index] - in1[index];
|
||||
out[index] = res > 0 ? res : 0;
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
int ElementSubRelu6(const float *in0, const float *in1, float *out, int size) {
|
||||
int index = 0;
|
||||
#ifdef ENABLE_NEON
|
||||
float32x4_t zeros = vdupq_n_f32(0.0f);
|
||||
float32x4_t bounds = vdupq_n_f32(6.0f);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
float32x4_t vin0 = vld1q_f32(in0 + index);
|
||||
float32x4_t vin1 = vld1q_f32(in1 + index);
|
||||
float32x4_t vout = vminq_f32(vmaxq_f32(vsubq_f32(vin0, vin1), zeros), bounds);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
#endif
|
||||
for (; index < size; index++) {
|
||||
out[index] = MSMIN(MSMAX(in0[index] - in1[index], 0), 6);
|
||||
}
|
||||
|
||||
return NNACL_OK;
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_SUB_FP32_H_
|
||||
#define MINDSPORE_LITE_NNACL_SUB_FP32_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/base/arithmetic_base.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int ElementSub(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementSubInt(const int *in0, const int *in1, int *out, int size);
|
||||
int ElementSubRelu(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementSubRelu6(const float *in0, const float *in1, float *out, int size);
|
||||
int ElementOptSub(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptSubRelu(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptSubRelu6(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
|
||||
int ElementOptSubInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_SUB_FP32_H_
|
Loading…
Reference in new issue