more arm32 optimizations, now 228b

This commit is contained in:
2022-10-05 13:54:04 +02:00
parent 3e31b37c1c
commit 39c95598f2

View File

@@ -18,86 +18,80 @@ upkr_unpack:
push { r3-r11, lr } push { r3-r11, lr }
mov r2, #384 mov r2, #384
movs r3, #128 mov r3, #128
.Lclear: .Lclear:
subs r2, r2, #1 subs r2, r2, #1
strb r3, [sp, -r2] strb r3, [sp, -r2]
bne .Lclear bne .Lclear
.Lloop: .Lloop:
movs r5, #0 mov r5, #0
bl upkr_decode_bit bl upkr_decode_bit
bcc .Ldata bcc .Ldata
.Lmatch: .Lmatch:
mov r5, #256 mov r5, #256
cmp r4, #0 rsbs r6, r4, #0
beq 1f blcc upkr_decode_bit
bcc .Lskip_offset
bl upkr_decode_bit
bcc 2f
1:
bl upkr_decode_length bl upkr_decode_length
subs r3, r4, #1 adds r3, r4, #1
popeq { r3-r11, pc } popeq { r3-r11, pc }
2: .Lskip_offset:
mov r5, #256+64 mov r5, #256+64
bl upkr_decode_length bl upkr_decode_length
.Lcopy_loop: .Lcopy_loop:
ldrb r5, [r0, -r3] ldrb r5, [r0, r3]
.Lstore: .Lstore:
strb r5, [r0], #1 strb r5, [r0], #1
subs r4, r4, #1 adds r4, r4, #1
bgt .Lcopy_loop blt .Lcopy_loop
b .Lloop b .Lloop
.Ldata: .Ldata:
movs r5, #1 mov r5, #1
.Ldata_loop: .Ldata_loop:
bl upkr_decode_bit bl upkr_decode_bit
adcs r5, r5, r5 adc r5, r5, r5
rsbs r4, r5, #256 movs r4, r5, lsr #8
bgt .Ldata_loop beq .Ldata_loop
b .Lstore b .Lstore
.type upkr_decode_length, %function .type upkr_decode_length, %function
upkr_decode_length: upkr_decode_length:
mov r12, lr mov r12, lr
movs r6, #0 mov r4, #0
mov r4, #1 mvn r6, #0
.Lbit_loop: .Lbit_loop:
adds r5, r5, #1 bl upkr_decode_bit_inc
bl upkr_decode_bit
addcc r4, r4, r6 addcc r4, r4, r6
movcc pc, r12 movcc pc, r12
adds r5, r5, #1 bl upkr_decode_bit_inc
bl upkr_decode_bit addcs r4, r4, r6
addcs r6, r6, r4 mov r6, r6, lsl #1
lsls r4, r4, #1
b .Lbit_loop b .Lbit_loop
.type upkr_decode_bit, %function .type upkr_decode_bit, %function
upkr_fill_state: upkr_decode_bit_inc:
ldrb r8, [r1], #1 add r5, r5, #1
orr r2, r8, r2, lsl #8
upkr_decode_bit: upkr_decode_bit:
cmp r2, #4096 cmp r2, #4096
blt upkr_fill_state ldrltb r8, [r1], #1
orrlt r2, r8, r2, lsl#8
blt upkr_decode_bit
ldrb r8, [sp, -r5] ldrb r8, [sp, -r5]
and r9, r2, #255 and r9, r2, #255
add r9, r9, #1 add r9, r9, #1
cmp r8, r9 cmp r8, r9
rsbcs r8, r8, #256 rsbcs r8, r8, #256
mov r9, r2, lsr#8 mvn r9, r2, lsr#8
addcc r9, r9, #1 addcs r9, r9, #1
mul r9, r8, r9 mla r2, r8, r9, r2
sub r2, r2, r9
add r9, r8, #8 add r9, r8, #8
sub r8, r8, r9, lsr#4 sub r8, r8, r9, lsr#4
rsbcs r8, r8, #256 rsbcs r8, r8, #256