bdcht / amoco

yet another tool for analysing binaries
GNU General Public License v2.0
462 stars 65 forks source link

X86 asm parser #2 #83

Closed LRGH closed 4 years ago

LRGH commented 4 years ago

Thank you. Another one that is currently broken.

from amoco.arch.core import type_data_processing, \
                            type_control_flow, \
                            type_other, \
                            type_cpu_state, \
                            type_undefined
from amoco.arch.x86 import cpu_x86, spec_ia32, spec_fpu, spec_sse
from amoco.arch.x86.formats import mnemo_string_rep
from amoco.arch.x86.env import *
from amoco.arch.x86.parsers import att_syntax
from amoco.ui import render

cpu_x86.instruction_x86.set_formatter(cpu_x86.IA32_Binutils_ATT)
render.conf.UI.formatter = 'Null'

spec_table = {}
for spec in spec_ia32.ISPECS + spec_fpu.ISPECS + spec_sse.ISPECS:
    mnemo = spec.iattr.get('mnemonic', None)
    if not mnemo in spec_table:
        spec_table[mnemo] = [spec]
    elif not spec in spec_table[mnemo]:
        spec_table[mnemo].append(spec)
del spec

def set_spec(i, spec_table):
    spec_collision = {
        'CBW': 'CWDE',
        'CWD': 'CDQ',
        'IRET': 'IRETD',
        'CDQE': 'CWDE',
        'CQO': 'CDQ',
        'LFENCE': 'XRSTOR',
        'MFENCE': 'XSAVEOPT',
        'SFENCE': 'CLFLUSH',
        'PEXTRQ': 'PEXTRD',
        'PINSRQ': 'PINSRD',
        'CMPXCHG16B': 'CMPXCHG8B',
        }
    if i.mnemonic in spec_collision:
        spec_list = spec_table[spec_collision[i.mnemonic]]
    elif i.mnemonic[:-1].lower() in mnemo_string_rep:
        spec_list = spec_table[i.mnemonic[:-1]+'D']
    else:
        spec_list = spec_table[i.mnemonic]
    ispec_idx = 0
    if i.mnemonic in ('CALL','JMP'):
        if i.operands[0]._is_mem:
            ispec_idx = 0
        elif i.operands[0]._is_reg and not i.operands[0]._is_lab:
            ispec_idx = 0
        else:
            ispec_idx = 1
    if i.mnemonic.lower()[:-1] in mnemo_string_rep:
        if not len(i.operands):
    i.spec = spec_list[ispec_idx]
    if 'type' in i.spec.iattr:
        i.type = i.spec.iattr['type']
    else:
        i.type = type_data_processing

s = "call foo"
i = att_syntax.instr.parseString(s)[0]
assert i.mnemonic == 'CALL'
set_spec(i, spec_table)
assert str(i) == 'call      foo'

Because foo is of type amoco.cas.expressions.lab and not a register.

You can notice that I have made this set_spec function, to be able to do str(i) when i is created from assembly. I don't know if it would break something, but it might be useful to have parseString always call set_spec.

bdcht commented 4 years ago

Thanks. Fixed by commit 9b168fd.

Will try to add set_spec to the x86.parser module, but there seems to be missing lines related to mnemo_string_rep case (or at least messed-up indentation).

LRGH commented 4 years ago

Indeed, copy-paste error. Here is the full definition (one line was missing).

def set_spec(i, spec_table):
    spec_collision = {
        'CBW': 'CWDE',
        'CWD': 'CDQ',
        'IRET': 'IRETD',
        'CDQE': 'CWDE',
        'CQO': 'CDQ',
        'LFENCE': 'XRSTOR',
        'MFENCE': 'XSAVEOPT',
        'SFENCE': 'CLFLUSH',
        'PEXTRQ': 'PEXTRD',
        'PINSRQ': 'PINSRD',
        'CMPXCHG16B': 'CMPXCHG8B',
        }
    if i.mnemonic in spec_collision:
        spec_list = spec_table[spec_collision[i.mnemonic]]
    elif i.mnemonic[:-1].lower() in mnemo_string_rep:
        spec_list = spec_table[i.mnemonic[:-1]+'D']
    else:
        spec_list = spec_table[i.mnemonic]
    ispec_idx = 0
    if i.mnemonic in ('CALL','JMP'):
        if i.operands[0]._is_mem:
            ispec_idx = 0
        elif i.operands[0]._is_reg and not i.operands[0]._is_lab:
            ispec_idx = 0
        else:
            ispec_idx = 1
    if i.mnemonic.lower()[:-1] in mnemo_string_rep:
        if not len(i.operands):
            ispec_idx = -1
    i.spec = spec_list[ispec_idx]
    if 'type' in i.spec.iattr:
        i.type = i.spec.iattr['type']
    else:
        i.type = type_data_processing
bdcht commented 4 years ago

Instructions' specs are now avoiding collisions so spec_collision is not required anymore. Each and every mnemonic has now its own spec. En route for defining the assembler (relying on defining spec.encode)... Function set_spec has been added to x86.parsers but is currently not used by action_instr yet.