fix assembly headers and declaration style

pull/14741/head
lixian 4 years ago
parent f1e1d054bf
commit 51208e3411

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global ConvDw3x3Int8BorderPixel
#ifndef __APPLE__
.type ConvDw3x3Int8BorderPixel, %function
#endif
// void ConvDw3x3Int8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height,
// size_t width, size_t in_kh_step, size_t in_kw_step, size_t channel, size_t in_zp, size_t out_zp,
@ -116,4 +111,3 @@ asm_function ConvDw3x3Int8BorderPixel
vpop {q4-q7}
pop {r4-r8, r9-r12, pc}
#endif
#endif

@ -3,10 +3,6 @@
.text
.align 5
.global ConvDwFp32Border
#ifndef __APPLE__
.type ConvDwFp32Border, %function
#endif
// void ConvDwFp32Border(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
// size_t in_kh_step, size_t in_kw_step, size_t kernel_w, size_t relu, size_t relu6)

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global ConvDwFp32Center
#ifndef __APPLE__
.type ConvDwFp32Center, %function
#endif
// void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
// size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step,
@ -164,4 +159,3 @@ LoopWEnd:
vpop {q4-q7}
pop {r0-r8, r10, r11, pc}
#endif
#endif

@ -3,10 +3,6 @@
.text
.align 5
.global ConvDwFp32Row
#ifndef __APPLE__
.type ConvDwFp32Row, %function
#endif
// voidConvDwFp32Row(float* output_ptr, const float* input_ptr, const float* filter_ptr,
// size_t num_pixels, size_t input_channel, size_t input_step)

@ -1,13 +1,9 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global ConvDwInt8Center
#ifndef __APPLE__
.type ConvDwInt8Center, %function
#endif
// void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height,
// int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step,
// int in_sw_step, int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp,
@ -277,4 +273,3 @@ asm_function ConvDwInt8Center
vpop {q4-q7}
pop {r0-r8, r10, r11, pc}
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global ConvDwInt8PostAlign4
#ifndef __APPLE__
.type ConvDwInt8PostAlign4, %function
#endif
// void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier,
// int32_t left_shift, int32_t right_shift, int32_t acc_min, int32_t acc_max);
@ -108,4 +103,3 @@ asm_function ConvDwInt8PostAlign4
bx lr
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global ConvDwInt8PostAlign4PerChannel
#ifndef __APPLE__
.type ConvDwInt8PostAlign4PerChannel, %function
#endif
// void ConvDwInt8PostAlign4PerChannel(int8_t *dst, int32_t *buffer, int channel4, int32_t output_zp, int32_t *out_multiplier,
// int32_t *left_shift, int32_t *right_shift, int32_t acc_min, int32_t acc_max);
@ -111,4 +106,3 @@ asm_function ConvDwInt8PostAlign4PerChannel
bx lr
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global ConvDwInt8Row
#ifndef __APPLE__
.type ConvDwInt8Row, %function
#endif
// void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels,
// int output_channel, int input_step, int8_t input_zp)
@ -132,4 +127,3 @@ asm_function ConvDwInt8Row
vpop {q4-q7}
pop {r4-r8, r9-r12, pc}
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global DeconvDwFp32Center
#ifndef __APPLE__
.type DeconvDwFp32Center, %function
#endif
// void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_t height, size_t width,
// size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
@ -67,4 +62,3 @@ asm_function DeconvDwFp32Center
pop {r0-r8, r10, r11, pc}
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global DeconvDwInt8Center
#ifndef __APPLE__
.type DeconvDwInt8Center, %function
#endif
// void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width,
// size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
@ -67,4 +62,3 @@ asm_function DeconvDwInt8Center
pop {r0-r8, r10, r11, pc}
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global DeconvDwInt8Post
#ifndef __APPLE__
.type DeconvDwInt8Post, %function
#endif
// void DeconvDwInt8Post(int8_t *dst, int32_t *output_buffer, const int32_t *bias, int block_channel, int pixel_nums,
// int out_multiplier, int left_shift, int right_shift, int32_t out_zp, int32_t acc_min,
@ -72,4 +67,3 @@ asm_function DeconvDwInt8Post
bx lr
#endif
#endif

@ -3,10 +3,6 @@
.text
.align 5
.global IndirectGemmInt16to32_8x4
#ifndef __APPLE__
.type IndirectGemmInt16to32_8x4, %function
#endif
// void IndirectGemmInt16to32_8x4(int *output, short *input, short *weight, size_t kszie, size_t ic8, size_t oc4, size_t offset);
// r0: output, r1: input, r2: weight, r3: kszie, r4: ic8, r5: oc4, r6: offset

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global IndirectGemmInt8_2x4
#ifndef __APPLE__
.type IndirectGemmInt8_2x4, %function
#endif
// void IndirectGemmInt8_2x4(int8_t *output, int8_t *input, int8_t *weight, int32_t *bias, size_t ksize, size_t ic4,
// size_t oc, size_t offset, int32_t *input_sum, size_t act_min, size_t act_max, size_t out_zp, int32_t *out_multiplier,
@ -294,4 +289,3 @@ LoopOcEnd:
vpop {q4-q7}
pop {r4-r8, r10, r11, pc}
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global MatVecMulFp32
#ifndef __APPLE__
.type MatVecMulFp32, %function
#endif
// void MatVecMulFp32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int col)
// r0: a
@ -183,4 +178,3 @@ End:
sub sp, sp, #52
pop {r0-r8, r9, r10, r11, pc}
#endif
#endif

@ -1,11 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global MatmulFloatNeon32
#ifndef __APPLE__
.type MatmulFloatNeon32, %function
#endif
.text
.align 5
// void MatmulFloatNeon32(const float *a, const float *b, float *c, const float *bias, int act_type, int depth
// int row, int col, size_t stride, size_t writeNhwc, size_t WriteWino)

@ -1,11 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global MatmulFloatNeon32Opt
#ifndef __APPLE__
.type MatmulFloatNeon32Opt, %function
#endif
.text
.align 5
// void MatmulFloatNeon32Opt(const float *a, const float *b, float *c, const float *bias, int act_type, int depth
// int row, int col, size_t stride, size_t writeMode)

@ -1,11 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global MatmulFloatNeon32Opt12x4
#ifndef __APPLE__
.type MatmulFloatNeon32Opt12x4, %function
#endif
.text
.align 5
// void MatmulFloatNeon32Opt12x4(const float *a, const float *b, float *c, const float *bias, int act_type, int depth
// int row, int col, size_t stride, size_t writeMode)

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global MatmulInt8Neon32
#ifndef __APPLE__
.type MatmulInt8Neon32, %function
#endif
//void MatmulInt8Neon32(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16,
// const int *input_sums, const int *weight_bias, int act_min, int act_max, int out_zp,
@ -286,4 +281,3 @@ End1:
vpop {q4-q7}
pop {r0-r11, pc}
#endif
#endif

@ -1,13 +1,8 @@
#ifdef __arm__
#ifndef __aarch64__
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global MatmulInt8Opt
#ifndef __APPLE__
.type MatmulInt8Opt, %function
#endif
//void MatmulInt8Neon32Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep16,
// const int *input_sums, const int *weight_bias, int act_min, int act_max, int out_zp,
@ -288,4 +283,3 @@ LoopRowEnd:
vpop {q4-q7}
pop {r0-r8, r10, r11, pc}
#endif
#endif

@ -3,10 +3,6 @@
.text
.align 5
.global MatrixMultiplyWinograd
#ifndef __APPLE__
.type MatrixMultiplyWinograd, %function
#endif
// MatrixMultiplyWinograd(float *matix_a, float *matrix_b, float *matrix_c, int m, int k, int n, int in_channel, int c4_channel)
// r0: matrix_a, r1: matrix_b, r2: matrix_c, r3: m, r4: k, r5: n, r6: in_channel, r7: c4_channel * 4

@ -1,12 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
//.p2align 5,,15
.global PostFuncBiasReluC4
#ifndef __APPLE__
.type PostFuncBiasReluC4, %function
#endif
asm_function PostFuncBiasReluC4
push {r4-r8, r10, r11, lr}
@ -234,3 +230,4 @@ Loop_C1_3_Write:
End:
sub sp, sp, #32
pop {r4-r8, r10, r11, pc}
#endif

@ -3,11 +3,6 @@
.text
.align 5
//.p2align 5,,15
.global PostFuncBiasReluC8
#ifndef __APPLE__
.type PostFuncBiasReluC8, %function
#endif
//void PostFuncBiasReluC8(float *dst, const float *src, const float *bias, size_t oc8div,size_t oc8mod
// size_t plane_size, size_t stride, int relu_type);

@ -1,12 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global PreSum4x16Int8Peroc
#ifndef __APPLE__
.type PreSum4x16Int8Peroc, %function
#endif
//void PreSum4x16Int8Peroc(const int8_t *src, int32_t *sum, int32_t *zp, size_t hw4, size_t ic16, int32_t oc_div2,
// size_t oc_res2, size_t stride);
@ -129,3 +125,4 @@ End:
sub sp, sp, #100
vpop {q4-q7}
pop {r4-r11, pc}
#endif

@ -1,12 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global PreSum4x16Int8Pert
#ifndef __APPLE__
.type PreSum4x16Int8Pert, %function
#endif
// void PreSum4x16Int8Pert(const int8_t *src, int32_t *sum, size_t row4, size_t col16, int32_t filter_zp);
@ -80,3 +76,4 @@ End:
sub sp, sp, #96
vpop {q4-q7}
pop {r4-r8, r10, r11, pc}
#endif

@ -1,11 +1,8 @@
#ifdef ENABLE_ARM32
#include "nnacl/assembly_global.h"
.text
.align 5
.global TiledC4MatmulFp32
#ifndef __APPLE__
.type TiledC4MatmulFp32, %function
#endif
.text
.align 5
asm_function TiledC4MatmulFp32
//void TiledC4MatmulFp32(float* dst, const float* src, const float* weight, size_t cal_num, size_t ic4, size_t oc4)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save