JonathanSalwan / Triton

Triton is a dynamic binary analysis library. Build your own program analysis tools, automate your reverse engineering, perform software verification or just emulate code.
https://triton-library.github.io
Apache License 2.0
3.4k stars 524 forks source link

Incorrect IDIV Instruction Emulation #1342

Closed hoangprod closed 2 days ago

hoangprod commented 4 days ago

The micro code implementation of idiv is incorrect and return invalid result when compare to the proper idiv result from Unicorn and Real CPU execution. Below is the PoC.

from triton import *
from unicorn import *
from unicorn.x86_const import *

# 0:  48 c7 c0 31 ee 28 4b    mov    rax,0x4b28ee31
# 7:  49 b9 69 75 08 b2 00    movabs r9,0xb2087569
# e:  00 00 00
# 11: 41 f6 f9                idiv   r9b 
AsmCode = b"\x48\xC7\xC0\x31\xEE\x28\x4B\x49\xB9\x69\x75\x08\xB2\x00\x00\x00\x00\x41\xF6\xF9"

def create_triton_ctx() -> TritonContext:  # type: ignore
    """
    Create a Triton context and set the architecture 
    """
    ctx = TritonContext()  # type: ignore
    ctx.setArchitecture(ARCH.X86_64) # type: ignore
    ctx.setMode(MODE.ALIGNED_MEMORY, True) # type: ignore
    ctx.setMode(MODE.ONLY_ON_SYMBOLIZED, False) # type: ignore
    ctx.setAstRepresentationMode(AST_REPRESENTATION.PYTHON) # type: ignore

    return ctx

def triton_idiv8():
    ctx = create_triton_ctx()

    ctx.setConcreteMemoryAreaValue(0x1000, AsmCode)

    startAddress = 0x1000

    while(startAddress < 0x1000 + len(AsmCode)):
        inst = Instruction()
        inst.setOpcode(ctx.getConcreteMemoryAreaValue(startAddress, 16))
        inst.setAddress(startAddress)
        ctx.processing(inst)
        startAddress += inst.getSize()
        print(inst)

    r9_val = ctx.getConcreteRegisterValue(getattr(ctx.registers, 'r9'))
    rax_val = ctx.getConcreteRegisterValue(getattr(ctx.registers, 'rax'))
    print(f"Triton: r9: {hex(r9_val)} rax: {hex(rax_val)}")

def unicorn_idiv8():
    mu = Uc(UC_ARCH_X86, UC_MODE_64)

    mu.mem_map(0x1000, 0x1000)

    mu.mem_write(0x1000, AsmCode)

    mu.emu_start(0x1000, 0x1000 + len(AsmCode))

    r9 = mu.reg_read(UC_X86_REG_R9)
    rax = mu.reg_read(UC_X86_REG_RAX)

    print(f"Unicorn: r9: {hex(r9)} rax: {hex(rax)}")

if __name__ == "__main__":
    triton_idiv8()
    unicorn_idiv8()

You can also see the proper result here which is 0x4B28D4D5 | 1260967125 in rax. Not 0x4b283dd5. https://godbolt.org/z/GW6zo4xaq

In summary, executing this assembly below

# 0:  48 c7 c0 31 ee 28 4b    mov    rax,0x4b28ee31
# 7:  49 b9 69 75 08 b2 00    movabs r9,0xb2087569
# e:  00 00 00
# 11: 41 f6 f9                idiv   r9b 

in both Unicorn and Triton give different result and the correct one happen to be Unicorn since it matches up with real CPU execution.

Triton: r9: 0xb2087569 rax: 0x4b283dd5 Unicorn: r9: 0xb2087569 rax: 0x4b28d4d5