From 30aea1cc765eb64181e3c0c4bf3b1c7b3ff1d8a7 Mon Sep 17 00:00:00 2001 From: lixian <179220644@qq.com> Date: Fri, 7 Aug 2020 17:00:15 +0800 Subject: [PATCH] fix depth wise convolution compile bugs --- .../arm/opclib/assembly/arm32/ConvDwFp32Center.S | 8 ++++---- .../arm/opclib/assembly/arm32/ConvDwInt8Center.S | 11 +++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwFp32Center.S b/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwFp32Center.S index 7f1724e656..c8398ca03d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwFp32Center.S +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwFp32Center.S @@ -21,7 +21,7 @@ ConvDwFp32Center: // clang's rule seems more simple, though there are no subroutine calls here // r4-r8 and q4-q7 must be saved according to https://static.docs.arm.com/ihi0042/i/aapcs32.pdf push {r0-r8, r10, r11, lr} - vpush {v4-v7} + vpush {q4-q7} add sp, sp, #112 ldr r4, [sp, #48] @@ -38,7 +38,7 @@ ConvDwFp32Center: cmp r5, #4 blt LoopW LoopW4: - mov r11, [sp, #76] // in_sw_step + ldr r11, [sp, #76] // in_sw_step mov r8, r1 // src_kh ldr r2, [sp, #8] // weight_kh ldr r6, [sp, #56] // kernel_h @@ -100,7 +100,7 @@ ConvDwFp32Center: mul r11, r11, r12 add r1, r1, r11 sub r5, r5, #4 - cmp r5, r5, #0 + cmp r5, #0 ble LoopWEnd cmp r5, #4 bge LoopW @@ -155,7 +155,7 @@ ConvDwFp32Center: bne LoopH LoopWEnd: sub sp, sp, #112 - vpop {v4-v7} + vpop {q4-q7} pop {r0-r8, r10, r11, pc} #endif #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwInt8Center.S b/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwInt8Center.S index b97dc8a9ee..2f75feaa19 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwInt8Center.S +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/assembly/arm32/ConvDwInt8Center.S @@ -52,7 +52,7 @@ ConvDwInt8Center: ldr r5, [sp, #52] // width ldr r0, [sp] // dst_w LoopW4: - mov r11, [sp, #76] // in_sw_step + ldr r11, [sp, #76] // in_sw_step mov r8, r1 // src_kh ldr r2, [sp, #8] // weight_kh ldr r6, [sp, #56] // kernel_h @@ -145,8 +145,11 @@ ConvDwInt8Center: mov r12, #4 mul r11, r11, r12 add r1, r1, r11 - subs r5, r5, #1 - bne LoopW4 + sub r5, r5, #4 + cmp r5, #0 + ble LoopWEnd + cmp r5, #4 + bge LoopW4 LoopW: mov r8, r1 // src_kh ldr r2, [sp, #8] // weight_kh @@ -199,7 +202,7 @@ ConvDwInt8Center: str r12, [sp, #4] subs r4, r4, #1 bne LoopH - +LoopWEnd: sub sp, sp, #112 vpop {q4-q7} pop {r0-r8, r10, r11, pc}