2 Commits

Author SHA1 Message Date
5c7aee046a optimize decode_bit some more -> 166b 2022-09-18 23:11:26 +02:00
612084a5bf decode_length returns negative value -> 172b 2022-09-18 22:36:31 +02:00

View File

@@ -49,7 +49,7 @@ upkr_unpack:
1: 1:
bl upkr_decode_length bl upkr_decode_length
subs r3, r6, r4 adds r3, r4, #1
beq .Lend beq .Lend
2: 2:
@@ -57,10 +57,11 @@ upkr_unpack:
bl upkr_decode_length bl upkr_decode_length
.Lcopy_loop: .Lcopy_loop:
ldrb r5, [r0, r3] ldrb r5, [r0, r3]
.Lstore:
strb r5, [r0] strb r5, [r0]
adds r0, r0, #1 adds r0, r0, #1
subs r4, r4, #1 adds r4, r4, #1
bne .Lcopy_loop blt .Lcopy_loop
b .Lloop b .Lloop
.Ldata: .Ldata:
@@ -71,29 +72,26 @@ upkr_unpack:
adcs r5, r5, r5 adcs r5, r5, r5
lsrs r4, r5, #8 lsrs r4, r5, #8
beq .Ldata_loop beq .Ldata_loop
b .Lstore
strb r5, [r0]
adds r0, r0, #1
b .Lloop
.Lend: .Lend:
add sp, sp, #FRAME_SIZE add sp, sp, #FRAME_SIZE
pop { r4, r5, r6, r7, pc } pop { r4, r5, r6, r7, pc }
.type upkr_decode_length, %function .type upkr_decode_length, %function
// r0 .. length tmp (saved) // r0 .. -length tmp (saved)
// r1 .. // r1 ..
// r2 .. // r2 ..
// r3 .. // r3 ..
// r4 .. length (returned) // r4 .. -length (returned)
// r5 .. context index (saved) // r5 .. context index (saved)
// r6 .. (saved) // r6 .. (saved)
// r7 .. // r7 ..
upkr_decode_length: upkr_decode_length:
push { r0, r5, r6, lr } push { r0, r5, r6, lr }
movs r4, #1
movs r0, #0 movs r0, #0
subs r4, r0, #1
.Lbit_loop: .Lbit_loop:
adds r5, r5, #1 adds r5, r5, #1
bl upkr_decode_bit bl upkr_decode_bit
@@ -102,58 +100,55 @@ upkr_decode_length:
adds r5, r5, #1 adds r5, r5, #1
bl upkr_decode_bit bl upkr_decode_bit
beq 2f beq 2f
orrs r0, r0, r4 adds r0, r0, r4
2: 2:
lsls r4, r4, #1 lsls r4, r4, #1
b .Lbit_loop b .Lbit_loop
1: 1:
orrs r4, r4, r0 adds r4, r4, r0
pop { r0, r5, r6, pc } pop { r0, r5, r6, pc }
.type upkr_decode_bit, %function .type upkr_decode_bit, %function
// r0 .. tmp / prob (saved) // r0 .. tmp / prob (saved)
// r1 .. out_ptr (modified) // r1 .. in_ptr (modified)
// r2 .. state (modified) // r2 .. state (modified)
// r3 .. scratch (saved) // r3 .. scratch (saved)
// r4 .. // r4 ..
// r5 .. context index (preserved) // r5 .. context index (preserved)
// r6 .. bit (returned) // r6 .. bit (returned)
// r7 .. probs ptr (preserved) // r7 .. probs ptr (preserved)
upkr_decode_bit: upkr_fill_state:
push { r0, r3, lr }
.Lstate_loop:
lsrs r3, r2, #12
bne 1f
lsls r2, r2, #8 lsls r2, r2, #8
ldrb r6, [r1] ldrb r6, [r1]
adds r1, r1, #1 adds r1, r1, #1
orrs r2, r2, r6 orrs r2, r2, r6
b .Lstate_loop
1: upkr_decode_bit:
lsrs r6, r2, #12
beq upkr_fill_state
push { r0, r1, r3, lr }
ldrb r0, [r7, r5] ldrb r0, [r7, r5]
lsrs r3, r2, #8 lsrs r3, r2, #8
uxtb r2, r2 uxtb r1, r2
subs r6, r2, r0 subs r6, r1, r0
blt 1f blt 1f
subs r2, r2, r0 subs r1, r2, r0
rsbs r0, r0, #0 rsbs r0, r0, #0
uxtb r0, r0
1: 1:
muls r3, r3, r0 muls r3, r3, r0
adds r2, r2, r3 adds r2, r1, r3
rsbs r3, r0, #0 rsbs r3, r0, #0
uxtb r3, r3 uxtb r3, r3
adds r3, r3, #8
lsrs r3, r3, #4 lsrs r3, r3, #4
adds r0, r0, r3 adcs r0, r0, r3
cmp r6, #0 cmp r6, #0
blt 1f blt 1f
@@ -164,4 +159,4 @@ upkr_decode_bit:
strb r0, [r7, r5] strb r0, [r7, r5]
lsrs r6, r6, #31 lsrs r6, r6, #31
pop { r0, r3, pc } pop { r0, r1, r3, pc }