NationalSecurityAgency / ghidra

Ghidra is a software reverse engineering (SRE) framework
https://www.nsa.gov/ghidra
Apache License 2.0
49.29k stars 5.67k forks source link

Ghidra incorrectly analyzes Borland C++ generated switches #6694

Open NancyAurum opened 1 week ago

NancyAurum commented 1 week ago

Describe the bug Borland C++ generates switches in form of JMP word ptr CS:[BX + CaseTable], where CaseTable is placed at the end of a function, containing the switch, and holds CS relative offsets (1 for each case) inside the function. Ghidra ignores that table and generates completely unrelated references far away from the function.

To Reproduce Steps to reproduce the behavior:

  1. Analyze the attached st.exe with the default settings.
  2. Press G and enter 4b0a:09e5

Other switches there follow the same scheme and are incorrectly analyzed too.

Expected behavior Ghidra uses the table properly to generate navigate around the switch.

Attachments st.zip

Environment (please complete the following information):

Additional context The work around is clearing existing references by the jump's address, and then manually adding a set of COMPUTED_JUMP references. The one runs SwitchOverride.java at the jump's address.

It would be nice if Ghidra did some sanity checks, that the jump target lies inside the function or anywhere near it. And if anything is suspicious, just asking user for further input, mentioning that SwitchOverride.java script.

LukeSerne commented 1 week ago

This bug occurs because the address of the jump table is not determined correctly.

This happens because there is a mismatch between how the segment pcode op is implemented, and the pcode for the JMP instruction. The segment(a, b) pcode op returns the address (a << 4) + b. However, the pcode for the JMP word ptr CS:[BX + 0xb69] instruction is:

       4b0a:09e5 2e ff a7 69 0b       JMP        word ptr CS:[BX + 0xb69]
                                                      $U3e00:4 = INT_RIGHT 0x4ba8a:4, 4:4
                                                      $U3f00:4 = INT_AND $U3e00:4, 0xf000:4
                                                      CS = SUBPIECE $U3f00:4, 0:4
                                                      $U2e80:2 = INT_ADD BX, 0xb69:2
                                                      $U3e00:4 = INT_RIGHT 0x4ba8a:4, 4:4
                                                      $U3f00:4 = INT_AND $U3e00:4, 0xf000:4
                                                      CS = SUBPIECE $U3f00:4, 0:4
                                                      $U4a80:4 = CALLOTHER "segment", CS, $U2e80:2
                                                      $U9200:2 = LOAD ram($U4a80:4)
                                                      $U29a80:4 = CALLOTHER "segment", CS, $U9200:2
                                                      BRANCHIND $U29a80:4

We would expect CS to have the value 0x4b0a, but here, it's updated (twice?!). The root cause seems to be the definition of currentCS in ia.sinc. This macro uses the 32-bit address of the next instruction to calculate the current value of CS. Since inst_next == (CS << 4) + offset, this macro sets CS to (inst_next >> 4) & 0xF000 == (((CS << 4) + offset) >> 4) & 0xF000. However, without knowing offset, it's impossible to determine what the real value of CS is. However, CS is set to the correct value at the start of the function. As such, it seems best to not change it, and have the currentCS macro just export the CS varnode.

https://github.com/NationalSecurityAgency/ghidra/blob/a1db2dac166973a381e7a98630bc11901f47d2d2/Ghidra/Processors/x86/data/languages/ia.sinc#L1049-L1050

The only other location the CS varnode is changed, is in the ptr1616 and ptr1632 macros. I don't really understand how real-mode 16-bit x86 works, so I assumed these updates to CS are necessary. These macros are used for the CALLF and JMPF instruction. In the CALLF instruction, the control flow will return from the call, so we should restore the CS value afterwards. The JMPF will not return, and as such, we don't need to restore the CS value.

https://github.com/NationalSecurityAgency/ghidra/blob/a1db2dac166973a381e7a98630bc11901f47d2d2/Ghidra/Processors/x86/data/languages/ia.sinc#L1379-L1381

As said before, I barely have any experience with real-mode 16-bit x86, so I'm not sure how well these changes actually model reality, or if it's just an ugly hack that happens to work decently well for this one sample. Anyway, I pasted the patch I applied below.

diff --git a/Ghidra/Processors/x86/data/languages/ia.sinc b/Ghidra/Processors/x86/data/languages/ia.sinc
index dd8c841131..1bf5513a0c 100644
--- a/Ghidra/Processors/x86/data/languages/ia.sinc
+++ b/Ghidra/Processors/x86/data/languages/ia.sinc
@@ -1046,8 +1046,8 @@ addr64: [Base64 + Index64*ss + simm32_64] is mod=2 & r_m=4; Index64 & Base64 & s
 addr64: [Base64 + Index64*ss]                  is mod=2 & r_m=4; Index64 & Base64 & ss; imm32=0   { local tmp=Base64+Index64*ss; export tmp; }
 @endif

-currentCS: CS is protectedMode=0 & CS { tmp:4 = (inst_next >> 4) & 0xf000; CS = tmp:2; export CS; }
-currentCS: CS is protectedMode=1 & CS { tmp:4 = (inst_next >> 16) & 0xffff; CS = tmp:2; export CS; }
+currentCS: CS is protectedMode=0 & CS { export CS; }
+currentCS: CS is protectedMode=1 & CS { export CS; }

 segWide: is segover=0                  { export 0:$(SIZE); }
 segWide: CS: is segover=1 & CS { export 0:$(SIZE); }
@@ -2526,10 +2526,10 @@ with : lockprefx=0 {
 @endif

 # direct far calls generate an opcode undefined exception in x86-64
-:CALLF ptr1616      is vexMode=0 & addrsize=0 & opsize=0 & byte=0x9a; ptr1616           { push22(CS); build ptr1616; push22(&:2 inst_next); call ptr1616; }
-:CALLF ptr1616      is vexMode=0 & addrsize=1 & opsize=0 & byte=0x9a; ptr1616           { push42(CS); build ptr1616; push42(&:2 inst_next); call ptr1616; }
-:CALLF ptr1632      is vexMode=0 & addrsize=0 & opsize=1 & byte=0x9a; ptr1632           { push22(CS); build ptr1632; push24(&:4 inst_next); call ptr1632; }
-:CALLF ptr1632      is vexMode=0 & addrsize=1 & opsize=1 & byte=0x9a; ptr1632           { pushseg44(CS); build ptr1632; push44(&:4 inst_next); call ptr1632; }
+:CALLF ptr1616      is vexMode=0 & addrsize=0 & opsize=0 & byte=0x9a; ptr1616           { tmp:2 = CS; push22(CS); build ptr1616; push22(&:2 inst_next); call ptr1616; CS = tmp; }
+:CALLF ptr1616      is vexMode=0 & addrsize=1 & opsize=0 & byte=0x9a; ptr1616           { tmp:2 = CS; push42(CS); build ptr1616; push42(&:2 inst_next); call ptr1616; CS = tmp; }
+:CALLF ptr1632      is vexMode=0 & addrsize=0 & opsize=1 & byte=0x9a; ptr1632           { tmp:2 = CS; push22(CS); build ptr1632; push24(&:4 inst_next); call ptr1632; CS = tmp; }
+:CALLF ptr1632      is vexMode=0 & addrsize=1 & opsize=1 & byte=0x9a; ptr1632           { tmp:2 = CS; pushseg44(CS); build ptr1632; push44(&:4 inst_next); call ptr1632; CS = tmp; }
 :CALLF addr16       is vexMode=0 & addrsize=0 & opsize=0 & byte=0xff; addr16 & reg_opcode=3 ... { local ptr:$(SIZE) = segment(DS,addr16); local addrptr:$(SIZE) = segment(*:2 (ptr+2),*:2 ptr);
                                                                                                   push22(CS); push22(&:2 inst_next); call [addrptr]; }
 :CALLF addr32       is vexMode=0 & addrsize=1 & opsize=0 & byte=0xff; addr32 & reg_opcode=3 ... { local dest:4 = addr32; push42(CS); push42(&:2 inst_next); call [dest]; }

With this patch applied, the pcode for the JMP instruction looks much more like I expected.

       4b0a:09e5 2e ff a7 69 0b       JMP        word ptr CS:[BX + 0xb69]
                                                      $U2e80:2 = INT_ADD BX, 0xb69:2
                                                      $U4680:4 = CALLOTHER "segment", CS, $U2e80:2
                                                      $U8e00:2 = LOAD ram($U4680:4)
                                                      $U29880:4 = CALLOTHER "segment", CS, $U8e00:2
                                                      BRANCHIND $U29880:4

Additionally, Ghidra is now able to correctly identify the jumptable, and it also correctly identifies the code for the switch cases. As such, the listing view also looks better:

the listing view looks better now...

And the decompiler is also able to correctly recover the switch cases. The decompiled code looks correct at a first glance. ... and the decompiler is able to correctly recover the switch cases!

NancyAurum commented 1 week ago

Yeah! looks much better than the SwitchOverride.java Many thanks! But there is a strange thing... that function with the switch (see below) saves a graphics file, switching on its type (1st byte in the stg_t structure). So I don't really get why it now rshifts param_1 by 0x10, since for the ES:BX far_t pointer such shift returns the segment.

The late 16bit x86 was all about using ES:BX and DS:DI as pointers to data structures or arrays, while using DX:AX and CX:BX to store 32bit integers, and bank-switching the EMS/XMS into the 640k (i.e. it was about processing inherently 32bit data with 16bits). For that st.exe, the Borland C++ compiler assigned each *.C file a separate CS segment, so calls inside a C file were near and calls into another .C were far, and therefore it was possible to segment-swap/overlay each C file (overlay manager actually walked the stack frames, patching returns), freeing large portion of memory for data. Additionally, the entire C library was usually combined into a single segment. Otherwise it is similar to 32bit x86.

stg_t * stshStoreStg(stg_t *gfx)

{
  uint uVar1;
  undefined2 uVar2;
  uint16_t ofs_lo;
  cte_t *psVar4;
  bool bVar4;
  far_t fVar5;
  far_t ptr_base;
  cte_t *pcVar6;
  ulong b;
  DW sz_00;
  DW sz;

  if (5 < gfx->type) {
    /* WARNING: Subroutine does not return */
    stExit(0x18);
  }
    /* WARNING: Switch is manually overridden */
  switch((undefined4)*(undefined2 *)((uint)gfx->type * 2 + 0xb69)) {
  case 0x4ba94:
    return gfx;
  case 0x4ba9d:
    sz.lo = *(uint *)&((stg_t *)gfx)->sz + 9;
    sz.hi = *(int *)((int)&((stg_t *)gfx)->sz + 2) + (uint)(0xfff6 < *(uint *)&((stg_t *)gfx)->sz);
    break;
  case 0x4bab6:
    uVar1 = *(uint *)((int)&((stg_t *)gfx)->w + 1);
    sz.lo = uVar1 + 0xe;
    sz.hi = *(int *)((int)&((stg_t *)gfx)->h + 1) + (uint)(0xfff1 < uVar1);
    break;
  case 0x4bacf:
    sz.lo = ((stg_t *)gfx)->w + 0xd;
    sz.hi = 0;
    break;
  case 0x4bae3:
    uVar1 = *(uint *)((int)&((stg_t *)gfx)[1].sz + 2);
    sz.lo = uVar1 + 0x14;
    sz.hi = ((stg_t *)gfx)[1].w + (uint)(0xffeb < uVar1);
  }
  if (StshCount < 0xd93) {
    b = (ulong)StshCount;
    StshCount = StshCount + 1;
    fVar5 = (far_t)LXMUL@(0x15,b);
    ptr_base.seg = StshCtPtr.seg;
    ptr_base.ofs = StshCtPtr.ofs;
    fVar5 = PADD@(ptr_base,fVar5);
    pcVar6 = (cte_t *)mkp(fVar5.ofs,fVar5.seg);
    psVar4 = (cte_t *)pcVar6;
    uVar2 = (undefined2)((ulong)pcVar6 >> 0x10);
    (psVar4->sz).hi = sz.hi;
    (&psVar4->sz)->lo = sz.lo;
    pcVar6->state = 0x0;
    ofs_lo = StshCtOfs.lo;
    (psVar4->ofs).hi = StshCtOfs.hi;
    (&psVar4->ofs)->lo = ofs_lo;
    *(undefined2 *)(psVar4->info + 10) = 0;
    *(undefined2 *)(psVar4->info + 8) = 0;
    bVar4 = CARRY2(StshCtOfs.lo,sz.lo);
    StshCtOfs.lo = StshCtOfs.lo + sz.lo;
    StshCtOfs.hi = StshCtOfs.hi + sz.hi + (uint)bVar4;
    *(undefined2 *)(psVar4->info + 2) = 0;
    *(undefined2 *)psVar4->info = 0;
    *(undefined2 *)(psVar4->info + 6) = 0;
    *(undefined2 *)(psVar4->info + 4) = 0;
    if (DebugVerbose != '\0') {
      printf((char *)"type %d len %ld \r",(uint)gfx->type);
      biosReadKey();
    }
    sz_00.hi = sz.hi;
    sz_00.lo = sz.lo;
    stshWriteFile(StshFile,(far_t)gfx,sz_00);
    stshFree((far_t)gfx);
    /* return the contents table entry pointer */
    return (stg_t *)pcVar6;
  }
    /* WARNING: Subroutine does not return */
  stExit(0x24);
}