capstone-engine / capstone

Capstone disassembly/disassembler framework for ARM, ARM64 (ARMv8), Alpha, BPF, Ethereum VM, HPPA, LoongArch, M68K, M680X, Mips, MOS65XX, PPC, RISC-V(rv32G/rv64G), SH, Sparc, SystemZ, TMS320C64X, TriCore, Webassembly, XCore and X86.
http://www.capstone-engine.org
7.61k stars 1.56k forks source link

Python binding lost mem operands of arm64 Insn by capstone v5.0.1 #2286

Open DiamondHunters opened 8 months ago

DiamondHunters commented 8 months ago

It's similar to https://github.com/capstone-engine/capstone/issues/2260 cstool and python binding are the same version but give different results.

python binding api will lost the mem operand python code(modified from capstone arm64 test):

from __future__ import print_function

import capstone
from capstone import *
from capstone.arm64 import *

ARM64_CODE = bytes.fromhex("2bb94239")

all_tests = (
        (CS_ARCH_ARM64, CS_MODE_ARM, ARM64_CODE, "ARM-64"),
        )

def print_insn_detail(insn):
    # print address, mnemonic and operands
    print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str))

    # "data" instruction generated by SKIPDATA option has no detail
    if insn.id == 0:
        return

    if len(insn.operands) > 0:
        print("\top_count: %u" % len(insn.operands))
        c = -1
        for i in insn.operands:
            c += 1
            if i.type == ARM64_OP_REG:
                print("\t\toperands[%u].type: REG = %s" % (c, insn.reg_name(i.reg)))
            if i.type == ARM64_OP_IMM:
                print("\t\toperands[%u].type: IMM = 0x%s" % (c, hex(i.imm)))
            if i.type == ARM64_OP_CIMM:
                print("\t\toperands[%u].type: C-IMM = %u" % (c, i.imm))
            if i.type == ARM64_OP_FP:
                print("\t\toperands[%u].type: FP = %f" % (c, i.fp))
            if i.type == ARM64_OP_MEM:
                print("\t\toperands[%u].type: MEM" % c)
                if i.mem.base != 0:
                    print("\t\t\toperands[%u].mem.base: REG = %s" \
                        % (c, insn.reg_name(i.mem.base)))
                if i.mem.index != 0:
                    print("\t\t\toperands[%u].mem.index: REG = %s" \
                        % (c, insn.reg_name(i.mem.index)))
                if i.mem.disp != 0:
                    print("\t\t\toperands[%u].mem.disp: 0x%s" \
                        % (c, to_x(i.mem.disp)))
            if i.type == ARM64_OP_REG_MRS:
                print("\t\toperands[%u].type: REG_MRS = 0x%x" % (c, i.reg))
            if i.type == ARM64_OP_REG_MSR:
                print("\t\toperands[%u].type: REG_MSR = 0x%x" % (c, i.reg))
            if i.type == ARM64_OP_PSTATE:
                print("\t\toperands[%u].type: PSTATE = 0x%x" % (c, i.pstate))
            if i.type == ARM64_OP_SYS:
                print("\t\toperands[%u].type: SYS = 0x%x" % (c, i.sys))
            if i.type == ARM64_OP_PREFETCH:
                print("\t\toperands[%u].type: PREFETCH = 0x%x" % (c, i.prefetch))
            if i.type == ARM64_OP_BARRIER:
                print("\t\toperands[%u].type: BARRIER = 0x%x" % (c, i.barrier))

            if i.shift.type != ARM64_SFT_INVALID and i.shift.value:
                print("\t\t\tShift: type = %u, value = %u" % (i.shift.type, i.shift.value))

            if i.ext != ARM64_EXT_INVALID:
                print("\t\t\tExt: %u" % i.ext)

            if i.vas != ARM64_VAS_INVALID:
                print("\t\t\tVector Arrangement Specifier: 0x%x" % i.vas)

            # if i.vess != ARM64_VESS_INVALID:
            #     print("\t\t\tVector Element Size Specifier: %u" % i.vess)

            if i.vector_index != -1:
                print("\t\t\tVector Index: %u" % i.vector_index)

            if i.access == CS_AC_READ:
                print("\t\toperands[%u].access: READ\n" % (c))
            elif i.access == CS_AC_WRITE:
                print("\t\toperands[%u].access: WRITE\n" % (c))
            elif i.access == CS_AC_READ | CS_AC_WRITE:
                print("\t\toperands[%u].access: READ | WRITE\n" % (c))

    if insn.writeback:
        print("\tWrite-back: True")
    if not insn.cc in [ARM64_CC_AL, ARM64_CC_INVALID]:
        print("\tCode-condition: %u" % insn.cc)
    if insn.update_flags:
        print("\tUpdate-flags: True")

    (regs_read, regs_write) = insn.regs_access()

    if len(regs_read) > 0:
        print("\tRegisters read:", end="")
        for r in regs_read:
            print(" %s" %(insn.reg_name(r)), end="")
        print("")

    if len(regs_write) > 0:
        print("\tRegisters modified:", end="")
        for r in regs_write:
            print(" %s" %(insn.reg_name(r)), end="")
        print("")

# ## Test class Cs
def test_class():

    for (arch, mode, code, comment) in all_tests:
        print("*" * 16)
        print("Platform: %s" % comment)
        print("Code: %s" % (code.hex()))
        print("Disasm:")

        try:
            md = Cs(arch, mode)
            md.detail = True
            for insn in md.disasm(code, 0):
                print_insn_detail(insn)
                print ()
            print("0x%x:\n" % (insn.address + insn.size))
        except CsError as e:
            print("ERROR: %s" % e)

if __name__ == '__main__':
    print("version :", capstone.__version__)
    test_class()
Output:

version : 5.0.1
****************
Platform: ARM-64
Code: 2bb94239
Disasm:
0x0:    ldrb    w11, [x9, #0xae]
    op_count: 1
        operands[0].type: REG = w11
        operands[0].access: WRITE

    Registers modified: w11

0x4:

but cstool gave the correct result:

$ cstool                  
Cstool for Capstone Disassembler Engine v5.0.1

$ cstool -d arm64 2bb94239
 0  2b b9 42 39  ldrb   w11, [x9, #0xae]
        ID: 561 (ldrb)
        op_count: 2
                operands[0].type: REG = w11
                operands[0].access: WRITE
                operands[1].type: MEM
                        operands[1].mem.base: REG = x9
                        operands[1].mem.disp: 0xae
                operands[1].access: READ
        Registers read: x9
        Registers modified: w11
DiamondHunters commented 8 months ago

DYLD_PRINT_LIBRARIES=1 python -c 'import capstone' library load: dyld[18101]: <81B692C8-CFD7-3ADD-842D-AA9DFA176748> [delete]/.venv11/lib/python3.11/site-packages/capstone/lib/libcapstone.dylib cs_version of this lib(decompiled by IDA):

__int64 __fastcall cs_version(_DWORD *a1, _DWORD *a2)
{
  if ( a1 && a2 )
  {
    *a1 = 5;
    *a2 = 0;
  }
  return 1280LL;
}

I also tried forcing python to loading libcapstone.5.dylib which bundle with cstool and get the same result so I think python bindings use the correct library but always gave wrong result.that's so weird

DiamondHunters commented 8 months ago

next branch could get correct result,but a lot of symbols was changed,It's heavy to refactor code.

version : 5.0.0
****************
Platform: ARM-64
Code: 2bb94239
Disasm:
0x0:    ldrb    w11, [x9, #0xae]
    op_count: 2
        operands[0].type: REG = w11
        operands[0].access: WRITE

        operands[1].type: MEM
            operands[1].mem.base: REG = x9
            operands[1].mem.disp: 0x0xae
        operands[1].access: READ

    Registers read: x9
    Registers modified: w11

0x4:
Rot127 commented 8 months ago

If you want to use next branch, you can refer to the release guide for v6 (see "Note about AArch64" section). There we document how to use the meta-programming macros to make the refactor easier.

DiamondHunters commented 8 months ago

release guide for v6

thanks