diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDw3x3Int8BorderPixel.S b/mindspore/lite/nnacl/assembly/arm32/ConvDw3x3Int8BorderPixel.S index c5732590d8..fc9bc19abf 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDw3x3Int8BorderPixel.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDw3x3Int8BorderPixel.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Int8BorderPixel -#ifndef __APPLE__ -.type ConvDw3x3Int8BorderPixel, %function -#endif // void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height, // size_t width, size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, @@ -116,4 +111,3 @@ asm_function ConvDw3x3Int8BorderPixel vpop {q4-q7} pop {r4-r8, r9-r12, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Border.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Border.S index 9f9d53cfc3..0de05fa75c 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Border.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Border.S @@ -3,10 +3,6 @@ .text .align 5 -.global ConvDwFp32Border -#ifndef __APPLE__ -.type ConvDwFp32Border, %function -#endif // void ConvDwFp32Border(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, // size_t in_kh_step, size_t in_kw_step, size_t kernel_w, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Center.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Center.S index ffcee6f380..4bd90cad29 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Center.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Center.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp32Center -#ifndef __APPLE__ -.type ConvDwFp32Center, %function -#endif // void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step, @@ -164,4 +159,3 @@ LoopWEnd: vpop {q4-q7} pop {r0-r8, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Row.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Row.S index 30a8693dcb..d66a780b73 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Row.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwFp32Row.S @@ -3,10 +3,6 @@ .text .align 5 -.global ConvDwFp32Row -#ifndef __APPLE__ -.type ConvDwFp32Row, %function -#endif // voidConvDwFp32Row(float* output_ptr, const float* input_ptr, const float* filter_ptr, // size_t num_pixels, size_t input_channel, size_t input_step) diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Center.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Center.S index 73d43abb45..10becb55a3 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Center.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Center.S @@ -1,13 +1,9 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8Center -#ifndef __APPLE__ -.type ConvDwInt8Center, %function -#endif + // void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, // int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, // int in_sw_step, int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp, @@ -277,4 +273,3 @@ asm_function ConvDwInt8Center vpop {q4-q7} pop {r0-r8, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4.S index 3367ab390f..ca3a0624f5 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8PostAlign4 -#ifndef __APPLE__ -.type ConvDwInt8PostAlign4, %function -#endif // void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier, // int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max); @@ -108,4 +103,3 @@ asm_function ConvDwInt8PostAlign4 bx lr #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4PerChannel.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4PerChannel.S index 270c959ee8..b595820fd7 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4PerChannel.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8PostAlign4PerChannel.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8PostAlign4PerChannel -#ifndef __APPLE__ -.type ConvDwInt8PostAlign4PerChannel, %function -#endif // void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp, int32_t *out_multiplier, // int32_t *left_shift, int32_t *right_shift, int32_t acc_min, int32_t acc_max); @@ -111,4 +106,3 @@ asm_function ConvDwInt8PostAlign4PerChannel bx lr #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Row.S b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Row.S index 48ddccfc4f..bd577ae4bf 100644 --- a/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Row.S +++ b/mindspore/lite/nnacl/assembly/arm32/ConvDwInt8Row.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8Row -#ifndef __APPLE__ -.type ConvDwInt8Row, %function -#endif // void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels, // int output_channel, int input_step, int8_t input_zp) @@ -132,4 +127,3 @@ asm_function ConvDwInt8Row vpop {q4-q7} pop {r4-r8, r9-r12, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/DeconvDwFp32Center.S b/mindspore/lite/nnacl/assembly/arm32/DeconvDwFp32Center.S index d0244746bc..7b89a4cb72 100644 --- a/mindspore/lite/nnacl/assembly/arm32/DeconvDwFp32Center.S +++ b/mindspore/lite/nnacl/assembly/arm32/DeconvDwFp32Center.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwFp32Center -#ifndef __APPLE__ -.type DeconvDwFp32Center, %function -#endif // void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, @@ -67,4 +62,3 @@ asm_function DeconvDwFp32Center pop {r0-r8, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S b/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S index 5db46b7a35..0394607238 100644 --- a/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S +++ b/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwInt8Center -#ifndef __APPLE__ -.type DeconvDwInt8Center, %function -#endif // void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, @@ -67,4 +62,3 @@ asm_function DeconvDwInt8Center pop {r0-r8, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Post.S b/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Post.S index 3722126b9a..3c1da6eebb 100644 --- a/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Post.S +++ b/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Post.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwInt8Post -#ifndef __APPLE__ -.type DeconvDwInt8Post, %function -#endif // void DeconvDwInt8Post(int8_t *dst, int32_t *output_buffer, const int32_t *bias, int block_channel, int pixel_nums, // int out_multiplier, int left_shift, int right_shift, int32_t out_zp, int32_t acc_min, @@ -72,4 +67,3 @@ asm_function DeconvDwInt8Post bx lr #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt16to32_8x4.S b/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt16to32_8x4.S index f8abe1c7a7..8616dcb6cc 100644 --- a/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt16to32_8x4.S +++ b/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt16to32_8x4.S @@ -3,10 +3,6 @@ .text .align 5 -.global IndirectGemmInt16to32_8x4 -#ifndef __APPLE__ -.type IndirectGemmInt16to32_8x4, %function -#endif // void IndirectGemmInt16to32_8x4(int *output, short *input, short *weight, size_t kszie, size_t ic8, size_t oc4, size_t offset); // r0: output, r1: input, r2: weight, r3: kszie, r4: ic8, r5: oc4, r6: offset diff --git a/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt8_2x4.S b/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt8_2x4.S index caea16f738..0dcf70fd58 100644 --- a/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt8_2x4.S +++ b/mindspore/lite/nnacl/assembly/arm32/IndirectGemmInt8_2x4.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global IndirectGemmInt8_2x4 -#ifndef __APPLE__ -.type IndirectGemmInt8_2x4, %function -#endif // void IndirectGemmInt8_2x4(int8_t *output, int8_t *input, int8_t *weight, int32_t *bias, size_t ksize, size_t ic4, // size_t oc, size_t offset, int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp, int32_t *out_multiplier, @@ -294,4 +289,3 @@ LoopOcEnd: vpop {q4-q7} pop {r4-r8, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/MatVecMulFp32.S b/mindspore/lite/nnacl/assembly/arm32/MatVecMulFp32.S index a99b331218..fd58cbf6a6 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatVecMulFp32.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatVecMulFp32.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global MatVecMulFp32 -#ifndef __APPLE__ -.type MatVecMulFp32, %function -#endif // void MatVecMulFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col) // r0: a @@ -183,4 +178,3 @@ End: sub sp, sp, #52 pop {r0-r8, r9, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/MatmulFp32.S b/mindspore/lite/nnacl/assembly/arm32/MatmulFp32.S index 7ad42d5df8..f92ed6fb63 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatmulFp32.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatmulFp32.S @@ -1,11 +1,8 @@ #ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFloatNeon32 -#ifndef __APPLE__ - .type MatmulFloatNeon32, %function -#endif + +.text +.align 5 // void MatmulFloatNeon32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride, size_t writeNhwc, size_t WriteWino) diff --git a/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt.S b/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt.S index 4a13bc92aa..18b817508b 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt.S @@ -1,11 +1,8 @@ #ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFloatNeon32Opt -#ifndef __APPLE__ - .type MatmulFloatNeon32Opt, %function -#endif + +.text +.align 5 // void MatmulFloatNeon32Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride, size_t writeMode) diff --git a/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt12x4.S b/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt12x4.S index fc6a2225a1..1b4ee259ca 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt12x4.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatmulFp32Opt12x4.S @@ -1,11 +1,8 @@ #ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFloatNeon32Opt12x4 -#ifndef __APPLE__ - .type MatmulFloatNeon32Opt12x4, %function -#endif + +.text +.align 5 // void MatmulFloatNeon32Opt12x4(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride, size_t writeMode) diff --git a/mindspore/lite/nnacl/assembly/arm32/MatmulInt8.S b/mindspore/lite/nnacl/assembly/arm32/MatmulInt8.S index 5d3e20fc29..c5028394af 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatmulInt8.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatmulInt8.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global MatmulInt8Neon32 -#ifndef __APPLE__ -.type MatmulInt8Neon32, %function -#endif //void MatmulInt8Neon32(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16, // const int *input_sums, const int *weight_bias, int act_min, int act_max, int out_zp, @@ -286,4 +281,3 @@ End1: vpop {q4-q7} pop {r0-r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/MatmulInt8Opt.S b/mindspore/lite/nnacl/assembly/arm32/MatmulInt8Opt.S index 03c45a17d7..2fddb0167d 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatmulInt8Opt.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatmulInt8Opt.S @@ -1,13 +1,8 @@ -#ifdef __arm__ -#ifndef __aarch64__ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global MatmulInt8Opt -#ifndef __APPLE__ -.type MatmulInt8Opt, %function -#endif //void MatmulInt8Neon32Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16, // const int *input_sums, const int *weight_bias, int act_min, int act_max, int out_zp, @@ -288,4 +283,3 @@ LoopRowEnd: vpop {q4-q7} pop {r0-r8, r10, r11, pc} #endif -#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/MatmulWinogradFp32.S b/mindspore/lite/nnacl/assembly/arm32/MatmulWinogradFp32.S index 8bc5533b9e..0871075455 100644 --- a/mindspore/lite/nnacl/assembly/arm32/MatmulWinogradFp32.S +++ b/mindspore/lite/nnacl/assembly/arm32/MatmulWinogradFp32.S @@ -3,10 +3,6 @@ .text .align 5 -.global MatrixMultiplyWinograd -#ifndef __APPLE__ -.type MatrixMultiplyWinograd, %function -#endif // MatrixMultiplyWinograd(float *matix_a, float *matrix_b, float *matrix_c, int m, int k, int n, int in_channel, int c4_channel) // r0: matrix_a, r1: matrix_b, r2: matrix_c, r3: m, r4: k, r5: n, r6: in_channel, r7: c4_channel * 4 diff --git a/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC4.S b/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC4.S index da9ea71f95..243ba532c7 100644 --- a/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC4.S +++ b/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC4.S @@ -1,12 +1,8 @@ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -//.p2align 5,,15 -.global PostFuncBiasReluC4 -#ifndef __APPLE__ -.type PostFuncBiasReluC4, %function -#endif asm_function PostFuncBiasReluC4 push {r4-r8, r10, r11, lr} @@ -234,3 +230,4 @@ Loop_C1_3_Write: End: sub sp, sp, #32 pop {r4-r8, r10, r11, pc} +#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC8.S b/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC8.S index 6716129c0e..da3bb87973 100644 --- a/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC8.S +++ b/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC8.S @@ -3,11 +3,6 @@ .text .align 5 -//.p2align 5,,15 -.global PostFuncBiasReluC8 -#ifndef __APPLE__ -.type PostFuncBiasReluC8, %function -#endif //void PostFuncBiasReluC8(float *dst, const float *src, const float *bias, size_t oc8div,size_t oc8mod // size_t plane_size, size_t stride, int relu_type); diff --git a/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Peroc.S b/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Peroc.S index e5f0629ed6..1433178faa 100644 --- a/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Peroc.S +++ b/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Peroc.S @@ -1,12 +1,8 @@ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global PreSum4x16Int8Peroc -#ifndef __APPLE__ -.type PreSum4x16Int8Peroc, %function -#endif - //void PreSum4x16Int8Peroc(const int8_t *src, int32_t *sum, int32_t *zp, size_t hw4, size_t ic16, int32_t oc_div2, // size_t oc_res2, size_t stride); @@ -129,3 +125,4 @@ End: sub sp, sp, #100 vpop {q4-q7} pop {r4-r11, pc} +#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Pert.S b/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Pert.S index 15ebaa139d..cea87851be 100644 --- a/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Pert.S +++ b/mindspore/lite/nnacl/assembly/arm32/PreSum4x16Int8Pert.S @@ -1,12 +1,8 @@ +#ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" .text .align 5 -.global PreSum4x16Int8Pert -#ifndef __APPLE__ -.type PreSum4x16Int8Pert, %function -#endif - // void PreSum4x16Int8Pert(const int8_t *src, int32_t *sum, size_t row4, size_t col16, int32_t filter_zp); @@ -80,3 +76,4 @@ End: sub sp, sp, #96 vpop {q4-q7} pop {r4-r8, r10, r11, pc} +#endif diff --git a/mindspore/lite/nnacl/assembly/arm32/TiledC4MatmulFp32.S b/mindspore/lite/nnacl/assembly/arm32/TiledC4MatmulFp32.S index e7961e37d3..1ad0058909 100644 --- a/mindspore/lite/nnacl/assembly/arm32/TiledC4MatmulFp32.S +++ b/mindspore/lite/nnacl/assembly/arm32/TiledC4MatmulFp32.S @@ -1,11 +1,8 @@ #ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" - .text - .align 5 - .global TiledC4MatmulFp32 -#ifndef __APPLE__ - .type TiledC4MatmulFp32, %function -#endif + +.text +.align 5 asm_function TiledC4MatmulFp32 //void TiledC4MatmulFp32(float* dst, const float* src, const float* weight, size_t cal_num, size_t ic4, size_t oc4) diff --git a/mindspore/lite/nnacl/assembly/arm32/WinogradTransLeft.S b/mindspore/lite/nnacl/assembly/arm32/WinogradTransLeft.S index 8ea2bc70d1..5737ca9370 100644 --- a/mindspore/lite/nnacl/assembly/arm32/WinogradTransLeft.S +++ b/mindspore/lite/nnacl/assembly/arm32/WinogradTransLeft.S @@ -1,12 +1,8 @@ #ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" - .text - .align 5 - .global WinogradTransLeft -#ifndef __APPLE__ - .type WinogradTransLeft, %function -#endif +.text +.align 5 //void WinogradTransLeft(const float* S, const float* B, float* M, size_t w, size_t h, size_t k, size_t length); //x0: S diff --git a/mindspore/lite/nnacl/assembly/arm32/WinogradTransRight.S b/mindspore/lite/nnacl/assembly/arm32/WinogradTransRight.S index 0b1c8f9a12..adf01f0ad5 100644 --- a/mindspore/lite/nnacl/assembly/arm32/WinogradTransRight.S +++ b/mindspore/lite/nnacl/assembly/arm32/WinogradTransRight.S @@ -1,12 +1,8 @@ #ifdef ENABLE_ARM32 #include "nnacl/assembly_global.h" - .text - .align 5 - .global WinogradTransRight -#ifndef __APPLE__ - .type WinogradTransRight, %function -#endif +.text +.align 5 //void WinogradTransRight(const float* S, const float* B, float* M, size_t w, size_t h, size_t k, size_t length); //x0: S diff --git a/mindspore/lite/nnacl/assembly/arm64/AdderFp32.S b/mindspore/lite/nnacl/assembly/arm64/AdderFp32.S index 621fd6eeac..5c082959dc 100644 --- a/mindspore/lite/nnacl/assembly/arm64/AdderFp32.S +++ b/mindspore/lite/nnacl/assembly/arm64/AdderFp32.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global AdderFloatNeon64 -#ifndef __APPLE__ - .type AdderFloatNeon64, %function -#endif + +.text +.align 5 // void AdderFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Corner.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Corner.S index d7b04b15bf..522454977c 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Corner.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Corner.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Corner -#ifndef __APPLE__ -.type ConvDw3x3Corner, %function -#endif // void ConvDw3x3Corner(float *dst, const float *src, const float *weight, const float *bias, int in_kh_step, // int in_kw_step, int channel, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Horizontal.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Horizontal.S index b28b7ab557..d7263816c3 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Horizontal.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Horizontal.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Horizontal -#ifndef __APPLE__ -.type ConvDw3x3Horizontal, %function -#endif // void ConvDw3x3Horizontal(float *dst, const float *src, const float *weight, const float *bias, int in_kh_step, // int in_kw_step, int channel, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride1.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride1.S index b28fc16704..8c27890ec1 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride1.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride1.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Stride1 -#ifndef __APPLE__ -.type ConvDw3x3Stride1, %function -#endif - // void ConvDw3x3Stride1(float *output, const float *buffer, const float *weight, const float *bias, int col_size, // int row_size, int channel, int output_h, int output_w, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride2.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride2.S index e77f60fd09..67e36d572a 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride2.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Stride2.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Stride2 -#ifndef __APPLE__ -.type ConvDw3x3Stride2, %function -#endif - // void ConvDw3x3Stride2(float *output, const float *buffer, const float *weight, const float *bias, int col_size, // int row_size, int channel, int output_h, int output_w, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Vertical.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Vertical.S index b1f8de19f7..f3d7053dcb 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Vertical.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Fp32Vertical.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Vertical -#ifndef __APPLE__ -.type ConvDw3x3Vertical, %function -#endif // void ConvDw3x3Vertical(float *dst, const float *src, const float *weight, const float *bias, int in_kh_step, // int in_kw_step, int channel, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8.S index 391401e88f..4783bd8d3f 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Int8Neon64 -#ifndef __APPLE__ -.type ConvDw3x3Int8Neon64, %function -#endif - // void ConvDw3x3Int8Neon64(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias, int input_col_size, // int input_row_size, int channel, int output_h, int output_w, int8_t in_zp, int32_t out_zp, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Corner.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Corner.S index 7ffdf0fd6f..3e497f8a4b 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Corner.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Corner.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Int8Corner -#ifndef __APPLE__ -.type ConvDw3x3Int8Corner, %function -#endif // void ConvDw3x3Int8Corner(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step, // size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, int32_t *out_multiplier, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Horizontal.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Horizontal.S index 5c1b11c919..1a93788c81 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Horizontal.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Horizontal.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Int8Horizontal -#ifndef __APPLE__ -.type ConvDw3x3Int8Horizontal, %function -#endif // void ConvDw3x3Int8Horizontal(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step, // size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, int32_t *out_multiplier, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Stride2.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Stride2.S index 2162ade6bb..73d9d0d3ca 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Stride2.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Stride2.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Int8Stride2 -#ifndef __APPLE__ -.type ConvDw3x3Int8Stride2, %function -#endif - // void ConvDw3x3Int8Stride2(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias, int input_col_size, // int input_row_size, int channel, int output_h, int output_w, int8_t in_zp, int32_t out_zp, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Vertical.S b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Vertical.S index 825aa583d8..51e8ae076f 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Vertical.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDw3x3Int8Vertical.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDw3x3Int8Vertical -#ifndef __APPLE__ -.type ConvDw3x3Int8Vertical, %function -#endif // void ConvDw3x3Int8Vertical(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t in_kh_step, // size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp, int32_t *out_multiplier, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Border.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Border.S index f3ce920f5d..50416389a8 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Border.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Border.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp32Border -#ifndef __APPLE__ -.type ConvDwFp32Border, %function -#endif // void ConvDwFp32Border(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, // size_t in_kh_step, size_t in_kw_step, size_t kernel_w, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Center.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Center.S index d4e6be641e..3c04494b6c 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Center.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp32Center -#ifndef __APPLE__ -.type ConvDwFp32Center, %function -#endif // void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect3x3.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect3x3.S index 246d8bfab4..cb3d7eb02e 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect3x3.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect3x3.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp32Indirect3x3 -#ifndef __APPLE__ -.type ConvDwFp32Indirect3x3, %function -#endif // void ConvDwFp32Indirect3x3(float *output, float **input, const float *weights, const float *bias, int channels, int output_width, // size_t input_stride, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect5x5.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect5x5.S index 6ff7307f78..bc9e9ba99f 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect5x5.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Indirect5x5.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp32Indirect5x5 -#ifndef __APPLE__ -.type ConvDwFp32Indirect5x5, %function -#endif // void ConvDwFp32Indirect5x5(float *output, float **input, const float *weights, const float *bias, int channels, int output_width, // size_t input_stride, size_t relu, size_t relu6) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Row.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Row.S index 1f5c76df3d..53d084641a 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Row.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwFp32Row.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp32Row -#ifndef __APPLE__ -.type ConvDwFp32Row, %function -#endif // void ConvDwFp32Row(float* output_ptr, const float* input_ptr,const float* filter_ptr, // size_t num_pixels, size_t input_channel, size_t input_step) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Center.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Center.S index 017732e7ca..4b9175469b 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Center.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8Center -#ifndef __APPLE__ -.type ConvDwInt8Center, %function -#endif // void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height, // size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4.S index 2f8ee9d1dc..1f626a50fc 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8PostAlign4 -#ifndef __APPLE__ -.type ConvDwInt8PostAlign4, %function -#endif // void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier, // int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max); diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4PerChannel.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4PerChannel.S index b56fd6a34b..750e302748 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4PerChannel.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8PostAlign4PerChannel.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8PostAlign4PerChannel -#ifndef __APPLE__ -.type ConvDwInt8PostAlign4PerChannel, %function -#endif // void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp, int32_t *out_multiplier, // int32_t *left_shift, int32_t *right_shift, int32_t acc_min, int32_t acc_max); diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Row.S b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Row.S index c15d860863..59218f8af9 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Row.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Row.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwInt8Row -#ifndef __APPLE__ -.type ConvDwInt8Row, %function -#endif // void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels, // int output_channel, int input_step, int8_t input_zp) diff --git a/mindspore/lite/nnacl/assembly/arm64/ConvFp32Center.S b/mindspore/lite/nnacl/assembly/arm64/ConvFp32Center.S index 277f3ebd10..a73f6589f1 100644 --- a/mindspore/lite/nnacl/assembly/arm64/ConvFp32Center.S +++ b/mindspore/lite/nnacl/assembly/arm64/ConvFp32Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvSwFp32Center -#ifndef __APPLE__ -.type ConvSwFp32Center, %function -#endif // void ConvSwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t ic4, size_t in_sh_step, diff --git a/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Border.S b/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Border.S index 31b186b8d2..29bc336834 100644 --- a/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Border.S +++ b/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Border.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwFp32Border -#ifndef __APPLE__ -.type DeconvDwFp32Border, %function -#endif // void DeconvDwFp32Border(float *dst, const float *src, const float *weight, size_t height, size_t width, // size_t in_kh_step, size_t in_kw_step, size_t kernel_w) diff --git a/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Center.S b/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Center.S index d4c49827d2..d99a4c9bac 100644 --- a/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Center.S +++ b/mindspore/lite/nnacl/assembly/arm64/DeconvDwFp32Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwFp32Center -#ifndef __APPLE__ -.type DeconvDwFp32Center, %function -#endif // void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step, diff --git a/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Center.S b/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Center.S index 8a69813657..3b48603a7e 100644 --- a/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Center.S +++ b/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwInt8Center -#ifndef __APPLE__ -.type DeconvDwInt8Center, %function -#endif // void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step, diff --git a/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Post.S b/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Post.S index ad3ba50ca5..e3da39ef0a 100644 --- a/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Post.S +++ b/mindspore/lite/nnacl/assembly/arm64/DeconvDwInt8Post.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwInt8Post -#ifndef __APPLE__ -.type DeconvDwInt8Post, %function -#endif // void DeconvDwInt8Post(int8_t *dst, int32_t *output_buffer, const int32_t *bias, int block_channel, int pixel_nums, // int out_multiplier, int left_shift, int right_shift, int32_t out_zp, int32_t acc_min, diff --git a/mindspore/lite/nnacl/assembly/arm64/IndirectGemmInt16to32_8x4.S b/mindspore/lite/nnacl/assembly/arm64/IndirectGemmInt16to32_8x4.S index 5e63493241..f4a6ed58e3 100644 --- a/mindspore/lite/nnacl/assembly/arm64/IndirectGemmInt16to32_8x4.S +++ b/mindspore/lite/nnacl/assembly/arm64/IndirectGemmInt16to32_8x4.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global IndirectGemmInt16to32_8x4 -#ifndef __APPLE__ -.type IndirectGemmInt16to32_8x4, %function -#endif // void IndirectGemmInt16to32_8x4(int *output, short *input, short *weight, size_t ksize, size_t ic8, size_t oc4, size_t offset); // x0: output, x1: input, x2: weight, x3: ksize, x4: ic8, x5: oc4, x6: offset diff --git a/mindspore/lite/nnacl/assembly/arm64/MatVecMulFp32.S b/mindspore/lite/nnacl/assembly/arm64/MatVecMulFp32.S index 88824e8aed..d77826108f 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatVecMulFp32.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatVecMulFp32.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatVecMulFp32 -#ifndef __APPLE__ - .type MatVecMulFp32, %function -#endif + +.text +.align 5 // void MatVecMulFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col) // x0: a diff --git a/mindspore/lite/nnacl/assembly/arm64/MatmulFp32.S b/mindspore/lite/nnacl/assembly/arm64/MatmulFp32.S index 47aaeb121e..36029c35ca 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatmulFp32.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatmulFp32.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFloatNeon64 -#ifndef __APPLE__ - .type MatmulFloatNeon64, %function -#endif + +.text +.align 5 // void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride, size_t writeNhwc, size_t WriteWino) diff --git a/mindspore/lite/nnacl/assembly/arm64/MatmulFp32Opt.S b/mindspore/lite/nnacl/assembly/arm64/MatmulFp32Opt.S index 07a87a8e81..62880ea15a 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatmulFp32Opt.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatmulFp32Opt.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFloatNeon64Opt -#ifndef __APPLE__ - .type MatmulFloatNeon64Opt, %function -#endif + +.text +.align 5 // void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride, size_t writeMode) diff --git a/mindspore/lite/nnacl/assembly/arm64/MatmulInt8.S b/mindspore/lite/nnacl/assembly/arm64/MatmulInt8.S index 600f122e16..8e9de330a0 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatmulInt8.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatmulInt8.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulInt8Neon64 -#ifndef __APPLE__ - .type MatmulInt8Neon64, %function -#endif + +.text +.align 5 //void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums, // const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift, diff --git a/mindspore/lite/nnacl/assembly/arm64/MatmulInt8Opt.S b/mindspore/lite/nnacl/assembly/arm64/MatmulInt8Opt.S index fd31cc0f9c..f5ecb8e8ac 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatmulInt8Opt.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatmulInt8Opt.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulInt8Opt -#ifndef __APPLE__ - .type MatmulInt8Opt, %function -#endif + +.text +.align 5 //void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16, const int *a_sums, // const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift, diff --git a/mindspore/lite/nnacl/assembly/arm64/MatmulR4Int8.S b/mindspore/lite/nnacl/assembly/arm64/MatmulR4Int8.S index 98426e2120..fd86c5500a 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatmulR4Int8.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatmulR4Int8.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatMulR4Int8Neon64 -#ifndef __APPLE__ - .type MatMulR4Int8Neon64, %function -#endif + +.text +.align 5 //void MatMulR4Int8Neon64(const int8_t *a, const int8_t *b, int32_t *dst, int row4, int col4, int deep16, // const int *input_sum, const int *bias) diff --git a/mindspore/lite/nnacl/assembly/arm64/MatmulWinogradFp32.S b/mindspore/lite/nnacl/assembly/arm64/MatmulWinogradFp32.S index 182e7f85ab..532fef0e94 100644 --- a/mindspore/lite/nnacl/assembly/arm64/MatmulWinogradFp32.S +++ b/mindspore/lite/nnacl/assembly/arm64/MatmulWinogradFp32.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global MatrixMultiplyWinograd -#ifndef __APPLE__ -.type MatrixMultiplyWinograd, %function -#endif // MatrixMultiplyWinograd(float *matix_a, float *matrix_b, float *matrix_c, int m, int k, int n, int in_channel, int c4_channel) // x0: matrix_a, x1: matrix_b, x2: matrix_c, x3: m, x4: k, x5: n, x6: in_channel, x7: c4_channel diff --git a/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC4.S b/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC4.S index 63794dd4d1..732ba026be 100644 --- a/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC4.S +++ b/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC4.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PostFuncBiasReluC4 -#ifndef __APPLE__ - .type PostFuncBiasReluC4, %function -#endif +.text +.align 5 //void PostFuncBiasReluC4(float *dst, const float *src, const float *bias, size_t oc4div, size_t oc4mod, // size_t plane_size, size_t plane_stride, size_t relu_type); diff --git a/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC8.S b/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC8.S index 05bde14ccf..8da24fed4c 100644 --- a/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC8.S +++ b/mindspore/lite/nnacl/assembly/arm64/PostFuncBiasReluC8.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PostFuncBiasReluC8 -#ifndef __APPLE__ - .type PostFuncBiasReluC8, %function -#endif +.text +.align 5 //void PostFuncBiasReluC8(float *dst, const float *src, const float *bias, size_t oc8div,size_t oc8mod // size_t plane_size, size_t stride, int relu_type); diff --git a/mindspore/lite/nnacl/assembly/arm64/PostFuncInt8C4Neon64.S b/mindspore/lite/nnacl/assembly/arm64/PostFuncInt8C4Neon64.S index 270c1aefc1..70811c0c39 100644 --- a/mindspore/lite/nnacl/assembly/arm64/PostFuncInt8C4Neon64.S +++ b/mindspore/lite/nnacl/assembly/arm64/PostFuncInt8C4Neon64.S @@ -1,14 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PostFuncInt8C4Neon64 -#ifndef __APPLE__ - .type PostFuncInt8C4Neon64, %function -#endif - +.text +.align 5 //void PostFuncInt8C4Neon64(const int32_t *in, const int32_t *bias, int8_t *out, size_t oc4div, size_t oc4res, // size_t plane, size_t stride, int32_t multiplier, int32_t left_shift, int32_t right_shift, diff --git a/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Peroc.S b/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Peroc.S index eb62903d91..427a3f1372 100644 --- a/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Peroc.S +++ b/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Peroc.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PreSum4x16Int8Peroc -#ifndef __APPLE__ - .type PreSum4x16Int8Peroc, %function -#endif + +.text +.align 5 //void PreSum4x16Int8Peroc(const int8_t *src, int32_t *sum, int32_t *zp, size_t hw4, size_t ic16, int32_t oc_div4, // size_t oc_res4, size_t stride); diff --git a/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Pert.S b/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Pert.S index af9d4b4061..7fb42396ec 100644 --- a/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Pert.S +++ b/mindspore/lite/nnacl/assembly/arm64/PreSum4x16Int8Pert.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PreSum4x16Int8Pert -#ifndef __APPLE__ - .type PreSum4x16Int8Pert, %function -#endif + +.text +.align 5 // void PreSum4x16Int8Pert(const int8_t *src, int32_t *dst, size_t row4, size_t col16, int32_t filter_zp); diff --git a/mindspore/lite/nnacl/assembly/arm64/TiledC4MatmulFp32.S b/mindspore/lite/nnacl/assembly/arm64/TiledC4MatmulFp32.S index 5e931e73b1..1e49e0d0ad 100644 --- a/mindspore/lite/nnacl/assembly/arm64/TiledC4MatmulFp32.S +++ b/mindspore/lite/nnacl/assembly/arm64/TiledC4MatmulFp32.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global TiledC4MatmulFp32 -#ifndef __APPLE__ - .type TiledC4MatmulFp32, %function -#endif +.text +.align 5 asm_function TiledC4MatmulFp32 //void TiledC4MatmulFp32(float* dst, const float* src, const float* weight, size_t ic4, size_t cal_num, size_t oc4) diff --git a/mindspore/lite/nnacl/assembly/arm64/WinogradTransLeft.S b/mindspore/lite/nnacl/assembly/arm64/WinogradTransLeft.S index e469642058..0f8927d3be 100644 --- a/mindspore/lite/nnacl/assembly/arm64/WinogradTransLeft.S +++ b/mindspore/lite/nnacl/assembly/arm64/WinogradTransLeft.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global WinogradTransLeft -#ifndef __APPLE__ - .type WinogradTransLeft, %function -#endif +.text +.align 5 asm_function WinogradTransLeft //void WinogradTransLeft(const float* S, const float* B, float* M, size_t w, size_t h, size_t k, size_t length); diff --git a/mindspore/lite/nnacl/assembly/arm64/WinogradTransRight.S b/mindspore/lite/nnacl/assembly/arm64/WinogradTransRight.S index a413cf5c01..d29ef52a08 100644 --- a/mindspore/lite/nnacl/assembly/arm64/WinogradTransRight.S +++ b/mindspore/lite/nnacl/assembly/arm64/WinogradTransRight.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global WinogradTransRight -#ifndef __APPLE__ - .type WinogradTransRight, %function -#endif +.text +.align 5 asm_function WinogradTransRight //void WinogradTransRight(const float* S, const float* B, float* M, size_t w, size_t h, size_t k, size_t length); diff --git a/mindspore/lite/nnacl/assembly/avx/ConvDwFp32Avx3x3.S b/mindspore/lite/nnacl/assembly/avx/ConvDwFp32Avx3x3.S index a55642d6c7..a5201893eb 100644 --- a/mindspore/lite/nnacl/assembly/avx/ConvDwFp32Avx3x3.S +++ b/mindspore/lite/nnacl/assembly/avx/ConvDwFp32Avx3x3.S @@ -2,12 +2,6 @@ #include "nnacl/assembly_global.h" .text .align 4 -.global ConvDwFp32Avx3x3 -#ifndef __APPLE__ -#ifndef WIN32 -.type ConvDwFp32Avx3x3, %function -#endif -#endif // void ConvDwFp32Avx3x3(float *output, float **input, const float *weights, const float *bias, size_t channels, size_t output_width, // size_t input_stride, size_t relum, szie_t relu6) diff --git a/mindspore/lite/nnacl/assembly/avx/ConvDwFp32BorderAvx.S b/mindspore/lite/nnacl/assembly/avx/ConvDwFp32BorderAvx.S index 5d3bd03c06..de240b4bfe 100644 --- a/mindspore/lite/nnacl/assembly/avx/ConvDwFp32BorderAvx.S +++ b/mindspore/lite/nnacl/assembly/avx/ConvDwFp32BorderAvx.S @@ -1,18 +1,14 @@ #ifdef ENABLE_AVX - .text - .align 4 - .global ConvDwFp32Border -#ifndef __APPLE__ -#ifndef WIN32 - .type ConvDwFp32Border, %function -#endif -#endif +#include "nnacl/assembly_global.h" + +.text +.align 4 // void ConvDwFp32Border(float *dst, const float *src, const float *weight, const float *bias, size_t height, // size_t width, size_t in_kh_step, size_t in_kw_step, size_t kernel_w, size_t relu, // size_t relu6); -ConvDwFp32Border: +asm_function ConvDwFp32Border pushq %r15 pushq %r14 pushq %r13 diff --git a/mindspore/lite/nnacl/assembly/avx/ConvDwFp32RowAvx.S b/mindspore/lite/nnacl/assembly/avx/ConvDwFp32RowAvx.S index 6896b78a68..c77b05c570 100644 --- a/mindspore/lite/nnacl/assembly/avx/ConvDwFp32RowAvx.S +++ b/mindspore/lite/nnacl/assembly/avx/ConvDwFp32RowAvx.S @@ -1,12 +1,8 @@ #ifdef ENABLE_AVX - .text - .align 4 - .global ConvDwFp32Row -#ifndef __APPLE__ -#ifndef WIN32 - .type ConvDwFp32Row, %function -#endif -#endif +#include "nnacl/assembly_global.h" + +.text +.align 4 // void ConvDwFp32Row(float *output_ptr, const float *input_tmp, const float *weight_ptr, size_t num_pixels, // size_t output_channel, size_t input_step); @@ -26,7 +22,7 @@ // 40: output_channel // 48: input_step -ConvDwFp32Row: +asm_function ConvDwFp32Row pushq %r15 pushq %r14 pushq %r13 diff --git a/mindspore/lite/nnacl/assembly/avx/MatmulAvx.S b/mindspore/lite/nnacl/assembly/avx/MatmulAvx.S index 643c1b3d0e..904e903132 100644 --- a/mindspore/lite/nnacl/assembly/avx/MatmulAvx.S +++ b/mindspore/lite/nnacl/assembly/avx/MatmulAvx.S @@ -1,13 +1,8 @@ #ifdef ENABLE_AVX #include "nnacl/assembly_global.h" - .text - .align 4 - .global MatmulFloatAvxOpt -#ifndef __APPLE__ -#ifndef WIN32 - .type MatmulFloatAvxOpt, %function -#endif -#endif + +.text +.align 4 // void MatmulFloatAvxOpt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth // int row, int col, size_t stride, size_t writeMode) diff --git a/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S index dc0e98bad1..0491ad0a67 100644 --- a/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S +++ b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp16Border -#ifndef __APPLE__ -.type ConvDwFp16Border, %function -#endif // void ConvDwFp16Border(float16_t *dst, const float16_t *src, const float16_t *weight, const float16_t *bias, // size_t height, size_t width, size_t in_kh_step, size_t in_kw_step, size_t kernel_w, size_t relu, diff --git a/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S index 359160786e..f8dca404b6 100644 --- a/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S +++ b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp16Center -#ifndef __APPLE__ -.type ConvDwFp16Center, %function -#endif // void ConvDwFp16Center(float16_t *dst, const float16_t *src, const float16_t *weight, const float16_t *bias, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step, diff --git a/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S index 324f0303ae..355257dd8e 100644 --- a/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S +++ b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global ConvDwFp16Row -#ifndef __APPLE__ -.type ConvDwFp16Row, %function -#endif // void ConvDwFp16Row(float16_t* output_ptr, const float16_t* input_ptr,const float16_t* filter_ptr, // size_t num_pixels, size_t input_channel, size_t input_step) diff --git a/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S b/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S index a807b5300a..f48401e4cc 100644 --- a/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S +++ b/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwFp16Border -#ifndef __APPLE__ -.type DeconvDwFp16Border, %function -#endif // void DeconvDwFp16Border(float *dst, const float *src, const float *weight, size_t height, size_t width, // size_t in_kh_step, size_t in_kw_step, size_t kernel_w) diff --git a/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S b/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S index d315ac914f..4348c47833 100644 --- a/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S +++ b/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global DeconvDwFp16Center -#ifndef __APPLE__ -.type DeconvDwFp16Center, %function -#endif // void DeconvDwFp16Center(float16_t *dst, const float16_t *src, const float16_t *weight, size_t height, size_t width, // size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step, diff --git a/mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S b/mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S index 650caa89fa..d75b874a40 100644 --- a/mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S +++ b/mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global Float16ToFloat32 -#ifndef __APPLE__ -.type Float16ToFloat32, %function -#endif // void Float16ToFloat32(const float16_t *input, float *output, int number); // x0: input, x1: output, x2: number diff --git a/mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S b/mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S index 7a9c794838..536e28ebaf 100644 --- a/mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S +++ b/mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global Float32ToFloat16 -#ifndef __APPLE__ -.type Float32ToFloat16, %function -#endif // void Float32ToFloat16(const float *input, float16_t output, int number); // x0: input, x1: output, x2: number diff --git a/mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S b/mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S index c4c2e5e311..5bd1da914e 100644 --- a/mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S +++ b/mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global IndirectGemmFp16_16x8 -#ifndef __APPLE__ -.type IndirectGemmFp16_16x8, %function -#endif // void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weight, float16_t *bias, // size_t step, size_t ic4, size_t oc8, size_t offset, size_t mode, size_t writeC4, size_t relu, size_t relu6); diff --git a/mindspore/lite/nnacl/assembly/fp16/MatVecMulFp16.S b/mindspore/lite/nnacl/assembly/fp16/MatVecMulFp16.S index 5a7adbb76b..e72b4e1ec1 100644 --- a/mindspore/lite/nnacl/assembly/fp16/MatVecMulFp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/MatVecMulFp16.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatVecMulFp16Neon64 -#ifndef __APPLE__ - .type MatVecMulFp16Neon64, %function -#endif + +.text +.align 5 // void MatVecMulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, int depth, int col) // x0: a diff --git a/mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S b/mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S index dac86acd0e..a1f02286a5 100644 --- a/mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFp16Neon64 -#ifndef __APPLE__ - .type MatmulFp16Neon64, %function -#endif + +.text +.align 5 // void MatmulFp16Neon64(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, // int depth, int row, int col, int stride, bool write_nhwc) diff --git a/mindspore/lite/nnacl/assembly/fp16/MatmulFp16Opt.S b/mindspore/lite/nnacl/assembly/fp16/MatmulFp16Opt.S index 38699e37b8..9662d18e47 100644 --- a/mindspore/lite/nnacl/assembly/fp16/MatmulFp16Opt.S +++ b/mindspore/lite/nnacl/assembly/fp16/MatmulFp16Opt.S @@ -1,11 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulFp16Neon64Opt -#ifndef __APPLE__ - .type MatmulFp16Neon64Opt, %function -#endif + +.text +.align 5 // void MatmulFp16Neon64Opt(const float16_t *a, const float16_t *b, float16_t *c, const float16_t *bias, int act_type, // int depth, int row, int col, size_t stride, size_t writeMode) diff --git a/mindspore/lite/nnacl/assembly/fp16/MatmulWinogradFp16.S b/mindspore/lite/nnacl/assembly/fp16/MatmulWinogradFp16.S index 029365b0a9..c119a942ba 100644 --- a/mindspore/lite/nnacl/assembly/fp16/MatmulWinogradFp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/MatmulWinogradFp16.S @@ -1,12 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global MatrixMultiplyWinogradFp16 -#ifndef __APPLE__ -.type MatrixMultiplyWinogradFp16, %function -#endif // MatrixMultiplyWinogradFp16(float16_t *matix_a, float16_t *matrix_b, float16_t *matrix_c, int m, int k, int n, int in_channel) // x0: matrix_a, x1: matrix_b, x2: matrix_c, x3: m, x4: k, x5: n, x6: in_channel diff --git a/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC4Fp16.S b/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC4Fp16.S index 2bf2f786b4..5815438e35 100644 --- a/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC4Fp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC4Fp16.S @@ -1,12 +1,8 @@ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PostFuncBiasReluC4Fp16 -#ifndef __APPLE__ - .type PostFuncBiasReluC4Fp16, %function -#endif +.text +.align 5 //void PostFuncBiasReluC4Fp16(float16_t *dst, const float16_t *src, const float16_t *bias, size_t oc4div, size_t oc4mod, // size_t plane_size, size_t plane_stride, size_t relu_type); @@ -278,3 +274,5 @@ Loop_C1_3_Write: End: ret + +#endif diff --git a/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S b/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S index dad91b9332..fe6f5e4011 100644 --- a/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S @@ -1,13 +1,8 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - //.p2align 5,,15 - .global PostFuncBiasReluC8Fp16 -#ifndef __APPLE__ - .type PostFuncBiasReluC8Fp16, %function -#endif +.text +.align 5 //void PostFuncBiasReluC8Fp16(float *dst, const float *src, const float *bias, size_t oc8div,size_t oc8mod // size_t plane_size, size_t stride, int relu_type); diff --git a/mindspore/lite/nnacl/assembly/fp16/TiledC4MatmulFp16.S b/mindspore/lite/nnacl/assembly/fp16/TiledC4MatmulFp16.S index 720ee3e1ac..6e54413706 100644 --- a/mindspore/lite/nnacl/assembly/fp16/TiledC4MatmulFp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/TiledC4MatmulFp16.S @@ -1,11 +1,8 @@ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" .text .align 5 -.global TiledC4MatmulFp16 -#ifndef __APPLE__ -.type TiledC4MatmulFp16, %function -#endif asm_function TiledC4MatmulFp16 @@ -258,3 +255,4 @@ LoopOcEnd: ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [sp], #64 ret +#endif diff --git a/mindspore/lite/nnacl/assembly/fp16/WinogradTransLeftFp16.S b/mindspore/lite/nnacl/assembly/fp16/WinogradTransLeftFp16.S index ccb782881d..5a34daa109 100644 --- a/mindspore/lite/nnacl/assembly/fp16/WinogradTransLeftFp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/WinogradTransLeftFp16.S @@ -1,11 +1,8 @@ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global WinogradTransLeftFp16 -#ifndef __APPLE__ - .type WinogradTransLeftFp16, %function -#endif +.text +.align 5 asm_function WinogradTransLeftFp16 @@ -135,3 +132,5 @@ LoopH: sub sp, sp, #16 ldp x19, x20, [sp], #16 ret + +#endif diff --git a/mindspore/lite/nnacl/assembly/fp16/WinogradTransRightFp16.S b/mindspore/lite/nnacl/assembly/fp16/WinogradTransRightFp16.S index 73c1e517d7..3d2ef04534 100644 --- a/mindspore/lite/nnacl/assembly/fp16/WinogradTransRightFp16.S +++ b/mindspore/lite/nnacl/assembly/fp16/WinogradTransRightFp16.S @@ -1,11 +1,8 @@ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global WinogradTransRightFp16 -#ifndef __APPLE__ - .type WinogradTransRightFp16, %function -#endif +.text +.align 5 asm_function WinogradTransRightFp16 @@ -138,4 +135,6 @@ LoopH: sub sp, sp, #16 ldp x19, x20, [sp], #16 - ret \ No newline at end of file + ret + +#endif diff --git a/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S b/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S index 5bc1e5095c..8dceae7ac5 100644 --- a/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S +++ b/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8.S @@ -1,11 +1,7 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulInt8DpNeon64 -#ifndef __APPLE__ - .type MatmulInt8DpNeon64, %function -#endif +.text +.align 5 //void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, // const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, diff --git a/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8Opt.S b/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8Opt.S index 95f30fe123..c3f473880b 100644 --- a/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8Opt.S +++ b/mindspore/lite/nnacl/assembly/opt/MatmulDpInt8Opt.S @@ -1,11 +1,7 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatmulInt8DpOpt -#ifndef __APPLE__ - .type MatmulInt8DpOpt, %function -#endif +.text +.align 5 //void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums, // const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift, diff --git a/mindspore/lite/nnacl/assembly/opt/MatmulOptR4Int8.S b/mindspore/lite/nnacl/assembly/opt/MatmulOptR4Int8.S index e769ae4185..107df32493 100644 --- a/mindspore/lite/nnacl/assembly/opt/MatmulOptR4Int8.S +++ b/mindspore/lite/nnacl/assembly/opt/MatmulOptR4Int8.S @@ -1,11 +1,7 @@ -#ifdef __aarch64__ +#ifdef ENABLE_ARM64 #include "nnacl/assembly_global.h" - .text - .align 5 - .global MatMulOptR4Int8Neon64 -#ifndef __APPLE__ - .type MatMulOptR4Int8Neon64, %function -#endif +.text +.align 5 //void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, // const int *input_sum, const int *bias) diff --git a/mindspore/lite/nnacl/assembly_global.h b/mindspore/lite/nnacl/assembly_global.h index e7cf56c4a1..8ef79d8d0d 100644 --- a/mindspore/lite/nnacl/assembly_global.h +++ b/mindspore/lite/nnacl/assembly_global.h @@ -16,18 +16,21 @@ #ifndef MINDSPORE_LITE_NNACL_ASSEMBLY_GLOBAL_H #define MINDSPORE_LITE_NNACL_ASSEMBLY_GLOBAL_H +// clang-format off .macro asm_function fname #ifdef __APPLE__ - .globl _\fname; -_\fname : +.globl _\fname +_\fname: #else - .global \fname; -#ifdef __ELE__ -.hidden \fname; -.type \fname, % function; +.global \fname +#ifdef __ELF__ +.hidden \fname +.type \fname, %function #endif -\fname : +\fname: #endif - .endm +.endm + +// clang-format on #endif // MINDSPORE_LITE_NNACL_ASSEMBLY_GLOBAL_H