rizinorg / rz-ghidra

Deep ghidra decompiler and sleigh disassembler integration for rizin
GNU Lesser General Public License v3.0
829 stars 88 forks source link

Decompiler not using the same stack variables as rizin #359

Open wannkunstbeikor opened 2 months ago

wannkunstbeikor commented 2 months ago

For this small windows crack me, the ghidra decompile output is just not getting the correct stack variables, below is the pdf output:

            ; CALL XREF from sym.___mingw_CRTStartup @ 0x4010f8
            ;-- _main:
┌ int main(int argc, char **argv, char **envp);
│           ; var const char *s2 @ stack - 0xe4
│           ; var char [100] s1 @ stack - 0xd4
│           ; var char [100] var_70h @ stack - 0x70
│           ; var int var_ch @ stack - 0xc
│           ; var int32_t var_8h @ stack - 0x8
│           0x00401350      push  ebp
│           0x00401351      mov   ebp, esp
│           0x00401353      push  edi
│           0x00401354      and   esp, 0xfffffff0
│           0x00401357      sub   esp, 0xe0
│           0x0040135d      call  sym.___main                          ; sym.___main
│           0x00401362      mov   dword [esp], str.Input_the_password: ; [0x403024:4]=0x75706e49 ; "Input the password: " ; const char *format
│           0x00401369      call  sym._printf                          ; sym._printf ; int printf(const char *format)
│           0x0040136e      lea   eax, [var_70h[0]]
│           0x00401372      mov   dword [s2], eax
│           0x00401376      mov   dword [esp], data.00403039           ; [0x403039:4]=0x4f007325 ; "%s" ; const char *format
│           0x0040137d      call  sym._scanf                           ; sym._scanf ; int scanf(const char *format)
│           0x00401382      mov   dword [s1[0]], 0x73373030            ; '007s'
│                                                                      ; [0x73373030:4]=-1
│           0x0040138a      mov   dword [s1[4]], 0x72657075            ; 'uper'
│                                                                      ; [0x72657075:4]=-1
│           0x00401392      mov   dword [s1[8]], 0x797073              ; 'spy'
│                                                                      ; [0x797073:4]=-1
│           0x0040139a      lea   edx, [s1[12]]
│           0x0040139e      mov   eax, 0
│           0x004013a3      mov   ecx, 0x16                            ; 22
│           0x004013a8      mov   edi, edx
│           0x004013aa      rep   stosd dword es:[edi], eax
│           0x004013ac      lea   eax, [var_70h[0]]
│           0x004013b0      mov   dword [s2], eax                      ; const char *s2
│           0x004013b4      lea   eax, [s1[0]]
│           0x004013b8      mov   dword [esp], eax                     ; const char *s1
│           0x004013bb      call  sym._strcmp                          ; sym._strcmp ; int strcmp(const char *s1, const char *s2)
│           0x004013c0      mov   dword [var_ch], eax
│           0x004013c7      cmp   dword [var_ch], 0
│       ┌─< 0x004013cf      jne   0x4013df
│       │   0x004013d1      mov   dword [esp], data.0040303c           ; [0x40303c:4]=0x214b4f ; "OK!" ; const char *s
│       │   0x004013d8      call  sym._puts                            ; sym._puts ; int puts(const char *s)
│      ┌──< 0x004013dd      jmp   0x4013eb
│      │└─> 0x004013df      mov   dword [esp], str.Not_ok              ; [0x403040:4]=0x20746f4e ; "Not ok!" ; const char *s
│      │    0x004013e6      call  sym._puts                            ; sym._puts ; int puts(const char *s)
│      │    ; CODE XREF from main @ 0x4013dd
│      └──> 0x004013eb      call  sym.__getch                          ; sym.__getch
│           0x004013f0      mov   eax, 0
│           0x004013f5      mov   edi, dword [var_8h]
│           0x004013f8      leave
└           0x004013f9      ret

and pdg output:

// WARNING: Variable defined which should be unmapped: var_8h

undefined4 main(void)
{
    int32_t iVar1;
    undefined4 *puVar2;
    char *s2;
    undefined4 uStack_dc;
    undefined4 uStack_d8;
    char s1 [100];
    char var_70h [100];
    int var_ch;
    int32_t var_8h;

    sym.___main();
    sym._printf("Input the password: ");
    sym._scanf(data.00403039, s1 + 0x5c);
    uStack_dc = 0x73373030;
    uStack_d8 = 0x72657075;
    s1[0] = 's';
    s1[1] = 'p';
    s1[2] = 'y';
    s1[3] = '\0';
    puVar2 = (undefined4 *)(s1 + 4);
    for (iVar1 = 0x16; iVar1 != 0; iVar1 = iVar1 + -1) {
        *puVar2 = 0;
        puVar2 = puVar2 + 1;
    }
    iVar1 = sym._strcmp(&uStack_dc, s1 + 0x5c);
    if (iVar1 == 0) {
        sym._puts(data.0040303c);
    } else {
        sym._puts("Not ok!");
    }
    sym.__getch();
    return 0;
}

as u can see s1 is starting at stack -0xd4 and then gets the string value assigned as dwords, but it only gets one of those correct (still at the false index) and then also fucks something up with the var_70h variable which should get passed into the strcmp and scanf function. So yeah not sure what exactly gets fucked, but the ghidra decompiler doesnt use the same variables as rizin. If I analyze the crackme with ghidra and edit the variable types the same way ghidra will display the function how i would expect it.

exe, password is crackmes.one