2 Commits

Author SHA1 Message Date
5c7aee046a optimize decode_bit some more -> 166b 2022-09-18 23:11:26 +02:00
612084a5bf decode_length returns negative value -> 172b 2022-09-18 22:36:31 +02:00

View File

@@ -49,7 +49,7 @@ upkr_unpack:
1:
bl upkr_decode_length
subs r3, r6, r4
adds r3, r4, #1
beq .Lend
2:
@@ -57,10 +57,11 @@ upkr_unpack:
bl upkr_decode_length
.Lcopy_loop:
ldrb r5, [r0, r3]
.Lstore:
strb r5, [r0]
adds r0, r0, #1
subs r4, r4, #1
bne .Lcopy_loop
adds r4, r4, #1
blt .Lcopy_loop
b .Lloop
.Ldata:
@@ -71,29 +72,26 @@ upkr_unpack:
adcs r5, r5, r5
lsrs r4, r5, #8
beq .Ldata_loop
strb r5, [r0]
adds r0, r0, #1
b .Lloop
b .Lstore
.Lend:
add sp, sp, #FRAME_SIZE
pop { r4, r5, r6, r7, pc }
.type upkr_decode_length, %function
// r0 .. length tmp (saved)
// r0 .. -length tmp (saved)
// r1 ..
// r2 ..
// r3 ..
// r4 .. length (returned)
// r4 .. -length (returned)
// r5 .. context index (saved)
// r6 .. (saved)
// r7 ..
upkr_decode_length:
push { r0, r5, r6, lr }
movs r4, #1
movs r0, #0
subs r4, r0, #1
.Lbit_loop:
adds r5, r5, #1
bl upkr_decode_bit
@@ -102,58 +100,55 @@ upkr_decode_length:
adds r5, r5, #1
bl upkr_decode_bit
beq 2f
orrs r0, r0, r4
adds r0, r0, r4
2:
lsls r4, r4, #1
b .Lbit_loop
1:
orrs r4, r4, r0
adds r4, r4, r0
pop { r0, r5, r6, pc }
.type upkr_decode_bit, %function
// r0 .. tmp / prob (saved)
// r1 .. out_ptr (modified)
// r1 .. in_ptr (modified)
// r2 .. state (modified)
// r3 .. scratch (saved)
// r4 ..
// r5 .. context index (preserved)
// r6 .. bit (returned)
// r7 .. probs ptr (preserved)
upkr_decode_bit:
push { r0, r3, lr }
.Lstate_loop:
lsrs r3, r2, #12
bne 1f
upkr_fill_state:
lsls r2, r2, #8
ldrb r6, [r1]
adds r1, r1, #1
orrs r2, r2, r6
b .Lstate_loop
1:
upkr_decode_bit:
lsrs r6, r2, #12
beq upkr_fill_state
push { r0, r1, r3, lr }
ldrb r0, [r7, r5]
lsrs r3, r2, #8
uxtb r2, r2
uxtb r1, r2
subs r6, r2, r0
subs r6, r1, r0
blt 1f
subs r2, r2, r0
subs r1, r2, r0
rsbs r0, r0, #0
uxtb r0, r0
1:
muls r3, r3, r0
adds r2, r2, r3
adds r2, r1, r3
rsbs r3, r0, #0
uxtb r3, r3
adds r3, r3, #8
lsrs r3, r3, #4
adds r0, r0, r3
adcs r0, r0, r3
cmp r6, #0
blt 1f
@@ -164,4 +159,4 @@ upkr_decode_bit:
strb r0, [r7, r5]
lsrs r6, r6, #31
pop { r0, r3, pc }
pop { r0, r1, r3, pc }