|
|
@ -22,7 +22,8 @@
|
|
|
|
MatmulFloatNeon32Opt12x4:
|
|
|
|
MatmulFloatNeon32Opt12x4:
|
|
|
|
// r4-r8 and q4-q7 must be saved according to https://static.docs.arm.com/ihi0042/i/aapcs32.pdf
|
|
|
|
// r4-r8 and q4-q7 must be saved according to https://static.docs.arm.com/ihi0042/i/aapcs32.pdf
|
|
|
|
push {r0-r8, r10, r11, lr}
|
|
|
|
push {r0-r8, r10, r11, lr}
|
|
|
|
add sp, sp, #48
|
|
|
|
vpush {q4-q7}
|
|
|
|
|
|
|
|
add sp, sp, #112
|
|
|
|
|
|
|
|
|
|
|
|
ldr r5, [sp, #4]
|
|
|
|
ldr r5, [sp, #4]
|
|
|
|
ldr r6, [sp, #8]
|
|
|
|
ldr r6, [sp, #8]
|
|
|
@ -400,6 +401,7 @@ LoopRow:
|
|
|
|
b LoopRow
|
|
|
|
b LoopRow
|
|
|
|
|
|
|
|
|
|
|
|
LoopRowEnd:
|
|
|
|
LoopRowEnd:
|
|
|
|
sub sp, sp, #48
|
|
|
|
sub sp, sp, #112
|
|
|
|
|
|
|
|
vpop {q4-q7}
|
|
|
|
pop {r0-r8, r10, r11, pc}
|
|
|
|
pop {r0-r8, r10, r11, pc}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|