; Comparison of register allocator between Cemu 1.11.0 and 1.11.1 ; Old register allocator: ; Not global, looks at blocks of uninterrupted instructions individually (uninterrupted means no branches to or from the instructions) ; New register allocator: ; Global, looks at the whole function. Can carry registers across branches and loops ; PPC function mflr r0 stw r0, 4(r1) stwu r1, -8(r1) lis r12, ((byte_1043EB90+0x10000)@h) lbz r0, byte_1043EB90@l(r12) cmpwi r0, 0 bne loc_2FA73C4 bl sub_2FA72D4 loc_2FA73C4: lbz r0, 0(r3) cmpwi r0, 0 li r11, -1 beq loc_2FA73FC lis r12, dword_10512FA8@h addi r12, r12, dword_10512FA8@l loc_2FA73DC: xor r9, r11, r0 lbzu r0, 1(r3) clrlslwi r10, r9, 24,2 srwi r8, r11, 8 lwzx r11, r10, r12 cmpwi r0, 0 xor r11, r8, r11 bne loc_2FA73DC loc_2FA73FC: not r3, r11 lwz r0, 0xC(r1) mtlr r0 addi r1, r1, 8 blr ____________________________________________________ ; Code generated by Cemu 1.11.0 (partially annotated) mov eax,dword ptr [rsp+2BCh] ; Move PPC LR into eax mov ebx,dword ptr [rsp+8] ; Move PPC R1 into ebx sub dword ptr [rsp+2B4h],7 ; decrement ppc CTR mov edx,eax mov r14,rdx movbe dword ptr [r13+rbx+4],r14d mov r14,rbx movbe dword ptr [r13+rbx-8],r14d add ebx,0FFFFFFF8h mov ebp,10440000h movzx edx,byte ptr [r13+rbp-1470h] cmp edx,0 sete byte ptr [rsp+28Eh] mov dword ptr [rsp+4],edx mov dword ptr [rsp+8],ebx mov dword ptr [rsp+34h],ebp jne 000000001DE4596E sub dword ptr [rsp+2B4h],1 mov dword ptr [rsp+2BCh],1FA73C4h ; update LR for function call mov edx,1FA72D4h jmp qword ptr [r15+23F4E5A8h] ; call function (BL) loc_000000001DE4596E: mov esi,dword ptr [rsp+10h] sub dword ptr [rsp+2B4h],4 movzx edx,byte ptr [r13+rsi] cmp edx,0 sete byte ptr [rsp+28Eh] mov edi,0FFFFFFFFh mov dword ptr [rsp+4],edx mov dword ptr [rsp+30h],edi je after_loop sub dword ptr [rsp+2B4h],2 mov ebp,10510000h add ebp,2FA8h mov dword ptr [rsp+34h],ebp loc_loop: mov edx,dword ptr [rsp+4] ; load ppc registers mov ebp,dword ptr [rsp+34h] mov esi,dword ptr [rsp+10h] mov edi,dword ptr [rsp+30h] sub dword ptr [rsp+2B4h],8 mov r8d,edi ; xor r9, r11, r0 xor r8d,edx add esi,1 ; lbzu r0, 1(r3) movzx edx,byte ptr [r13+rsi] mov r9d,r8d ; clrlslwi r10, r9, 24,2 rol r9d,2 and r9d,3FCh mov r10,rdi ; srwi r8, r11, 8 shr r10d,8 add r9d,ebp ; lwzx r11, r10, r12 movbe edi,dword ptr [r13+r9] sub r9d,ebp xor edi,r10d ; xor r11, r8, r11 (reordered to avoid modifing rflags set by CMP) cmp edx,0 ; cmpwi r0, 0 sete byte ptr [rsp+28Eh] ; update PPC CR0 equal-bit mov dword ptr [rsp+4],edx ; store modified ppc registers mov dword ptr [rsp+10h],esi mov dword ptr [rsp+30h],edi mov dword ptr [rsp+28h],r8d mov dword ptr [rsp+2Ch],r9d mov dword ptr [rsp+24h],r10d jne loc_loop ; bne after_loop: mov ebx,dword ptr [rsp+8] mov edi,dword ptr [rsp+30h] sub dword ptr [rsp+2B4h],5 mov esi,edi not esi movbe edx,dword ptr [r13+rbx+0Ch] mov eax,edx add ebx,8 mov dword ptr [rsp+2BCh],eax mov dword ptr [rsp+4],edx mov dword ptr [rsp+8],ebx mov dword ptr [rsp+10h],esi mov edx,dword ptr [rsp+2BCh] jmp qword ptr [r15+rdx*2+20000000h] ; blr ____________________________________________________ ; Code generated by Cemu 1.11.1 sub dword ptr [rsp+2B4h],7 mov r10d,dword ptr [rsp+2BCh] mov r9d,r10d mov r8d,dword ptr [rsp+8] movbe dword ptr [r13+r8+4],r9d mov r14,r8 movbe dword ptr [r13+r8-8],r14d add r8d,0FFFFFFF8h mov edi,10440000h movzx r9d,byte ptr [r13+rdi-1470h] cmp r9d,0 sete byte ptr [rsp+28Eh] mov dword ptr [rsp+4],r9d mov dword ptr [rsp+34h],edi mov dword ptr [rsp+8],r8d jne 000000001D7BFD74 sub dword ptr [rsp+2B4h],1 mov dword ptr [rsp+2BCh],1FA73C4h ; update LR for function call mov edx,1FA72D4h jmp qword ptr [r15+23F4E5A8h] ; call function (BL) mov edi,dword ptr [rsp+34h] mov r8d,dword ptr [rsp+8] mov r9d,dword ptr [rsp+4] mov r10d,dword ptr [rsp+2BCh] loc_000000001D7BFD74: sub dword ptr [rsp+2B4h],4 mov esi,dword ptr [rsp+10h] movzx r9d,byte ptr [r13+rsi] cmp r9d,0 sete byte ptr [rsp+28Eh] mov ebp,0FFFFFFFFh mov dword ptr [rsp+30h],ebp mov dword ptr [rsp+4],r9d je after_loop sub dword ptr [rsp+2B4h],2 mov edi,10510000h add edi,2FA8h mov eax,dword ptr [rsp+24h] ; ppc register loads moved out of hot path mov edx,dword ptr [rsp+2Ch] mov ebx,dword ptr [rsp+28h] loc_loop: sub dword ptr [rsp+2B4h],8 mov ebx,ebp ; xor r9, r11, r0 xor ebx,r9d add esi,1 ; lbzu r0, 1(r3) movzx r9d,byte ptr [r13+rsi] mov edx,ebx ; clrlslwi r10, r9, 24,2 rol edx,2 and edx,3FCh mov rax,rbp ; srwi r8, r11, 8 shr eax,8 add edx,edi ; lwzx r11, r10, r12 movbe ebp,dword ptr [r13+rdx] sub edx,edi xor ebp,eax ; xor r11, r8, r11 (reordered) cmp r9d,0 ; cmpwi r0, 0 sete byte ptr [rsp+28Eh] ; update PPC CR0 equal-bit jne loc_loop ; bne mov dword ptr [rsp+24h],eax ; ppc register stores moved out of hot path mov dword ptr [rsp+2Ch],edx mov dword ptr [rsp+28h],ebx mov dword ptr [rsp+30h],ebp mov dword ptr [rsp+4],r9d mov dword ptr [rsp+34h],edi after_loop: sub dword ptr [rsp+2B4h],5 mov eax,dword ptr [rsp+30h] mov esi,eax not esi mov dword ptr [rsp+10h],esi movbe eax,dword ptr [r13+r8+0Ch] mov r10d,eax mov dword ptr [rsp+2BCh],r10d mov dword ptr [rsp+4],eax add r8d,8 mov dword ptr [rsp+8],r8d mov edx,dword ptr [rsp+2BCh] jmp qword ptr [r15+rdx*2+20000000h] ; blr