From 39c95598f254f33d68d0f32bd40d49881a0ec94d Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Wed, 5 Oct 2022 13:54:04 +0200 Subject: [PATCH] more arm32 optimizations, now 228b --- asm_unpackers/unpack_arm32.S | 62 ++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/asm_unpackers/unpack_arm32.S b/asm_unpackers/unpack_arm32.S index 99b880b..81cfff9 100644 --- a/asm_unpackers/unpack_arm32.S +++ b/asm_unpackers/unpack_arm32.S @@ -18,86 +18,80 @@ upkr_unpack: push { r3-r11, lr } mov r2, #384 - movs r3, #128 + mov r3, #128 .Lclear: subs r2, r2, #1 strb r3, [sp, -r2] bne .Lclear .Lloop: - movs r5, #0 + mov r5, #0 bl upkr_decode_bit bcc .Ldata .Lmatch: mov r5, #256 - cmp r4, #0 - beq 1f + rsbs r6, r4, #0 + blcc upkr_decode_bit + bcc .Lskip_offset - bl upkr_decode_bit - bcc 2f - -1: bl upkr_decode_length - subs r3, r4, #1 + adds r3, r4, #1 popeq { r3-r11, pc } -2: +.Lskip_offset: mov r5, #256+64 bl upkr_decode_length .Lcopy_loop: - ldrb r5, [r0, -r3] + ldrb r5, [r0, r3] .Lstore: strb r5, [r0], #1 - subs r4, r4, #1 - bgt .Lcopy_loop + adds r4, r4, #1 + blt .Lcopy_loop b .Lloop .Ldata: - movs r5, #1 + mov r5, #1 .Ldata_loop: bl upkr_decode_bit - adcs r5, r5, r5 - rsbs r4, r5, #256 - bgt .Ldata_loop + adc r5, r5, r5 + movs r4, r5, lsr #8 + beq .Ldata_loop b .Lstore .type upkr_decode_length, %function upkr_decode_length: mov r12, lr - movs r6, #0 - mov r4, #1 + mov r4, #0 + mvn r6, #0 .Lbit_loop: - adds r5, r5, #1 - bl upkr_decode_bit + bl upkr_decode_bit_inc addcc r4, r4, r6 movcc pc, r12 - adds r5, r5, #1 - bl upkr_decode_bit - addcs r6, r6, r4 - lsls r4, r4, #1 + bl upkr_decode_bit_inc + addcs r4, r4, r6 + mov r6, r6, lsl #1 b .Lbit_loop .type upkr_decode_bit, %function -upkr_fill_state: - ldrb r8, [r1], #1 - orr r2, r8, r2, lsl #8 - +upkr_decode_bit_inc: + add r5, r5, #1 upkr_decode_bit: cmp r2, #4096 - blt upkr_fill_state + ldrltb r8, [r1], #1 + orrlt r2, r8, r2, lsl#8 + blt upkr_decode_bit ldrb r8, [sp, -r5] and r9, r2, #255 add r9, r9, #1 cmp r8, r9 rsbcs r8, r8, #256 - mov r9, r2, lsr#8 - addcc r9, r9, #1 - mul r9, r8, r9 - sub r2, r2, r9 + mvn r9, r2, lsr#8 + addcs r9, r9, #1 + mla r2, r8, r9, r2 add r9, r8, #8 sub r8, r8, r9, lsr#4 rsbcs r8, r8, #256