Open honggyukim opened 4 years ago
https://github.com/qemu/qemu/blob/3979fca4b69fc31c372687cd0bb6950592f248bd/disas/arm.c#L3867-L3965
$ cat qemu/disas/arm.c
...
/* NOTE: There are no checks in these routines that
the relevant number of data bytes exist. */
int
print_insn_arm (bfd_vma pc, struct disassemble_info *info)
{
unsigned char b[4];
long given;
int status;
int is_thumb = false;
int is_data = false;
unsigned int size = 4;
void (*printer) (bfd_vma, struct disassemble_info *, long);
int little;
little = (info->endian == BFD_ENDIAN_LITTLE);
is_thumb |= (pc & 1);
pc &= ~(bfd_vma)1;
if (force_thumb)
is_thumb = true;
info->bytes_per_line = 4;
if (is_data)
{
int i;
/* size was already set above. */
info->bytes_per_chunk = size;
printer = print_insn_data;
status = info->read_memory_func (pc, (bfd_byte *)b, size, info);
given = 0;
if (little)
for (i = size - 1; i >= 0; i--)
given = b[i] | (given << 8);
else
for (i = 0; i < (int) size; i++)
given = b[i] | (given << 8);
}
else if (!is_thumb)
{
/* In ARM mode endianness is a straightforward issue: the instruction
is four bytes long and is either ordered 0123 or 3210. */
printer = print_insn_arm_internal;
info->bytes_per_chunk = 4;
size = 4;
status = info->read_memory_func (pc, (bfd_byte *)b, 4, info);
if (little)
given = (b[0]) | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
else
given = (b[3]) | (b[2] << 8) | (b[1] << 16) | (b[0] << 24);
}
else
{
/* In Thumb mode we have the additional wrinkle of two
instruction lengths. Fortunately, the bits that determine
the length of the current instruction are always to be found
in the first two bytes. */
printer = print_insn_thumb16;
info->bytes_per_chunk = 2;
size = 2;
status = info->read_memory_func (pc, (bfd_byte *)b, 2, info);
if (little)
given = (b[0]) | (b[1] << 8);
else
given = (b[1]) | (b[0] << 8);
if (!status)
{
/* These bit patterns signal a four-byte Thumb
instruction. */
if ((given & 0xF800) == 0xF800
|| (given & 0xF800) == 0xF000
|| (given & 0xF800) == 0xE800)
{
status = info->read_memory_func (pc + 2, (bfd_byte *)b, 2, info);
if (little)
given = (b[0]) | (b[1] << 8) | (given << 16);
else
given = (b[1]) | (b[0] << 8) | (given << 16);
printer = print_insn_thumb32;
size = 4;
}
}
if (ifthen_address != pc)
find_ifthen_state(pc, info, little);
if (ifthen_state)
{
if ((ifthen_state & 0xf) == 0x8)
ifthen_next_state = 0;
else
ifthen_next_state = (ifthen_state & 0xe0)
| ((ifthen_state & 0xf) << 1);
}
}
...
$ objdump -d uftrace | grep "[0-9a-f][0-9a-f][0-9a-f][0-9a-f] [0-9a-f][0-9a-f][0-9a-f][0-9a-f]"
$ objdump -d uftrace | grep "[0-9a-f][0-9a-f][0-9a-f][0-9a-f] [0-9a-f][0-9a-f][0-9a-f][0-9a-f]" | sed 's/.*\([0-9a-f][0-9a-f][0-9a-f][0-9a-f] [0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/g'
The following may be a working function for our purpose.
/* check whether the given instruction is a Thumb32 instruction */
static bool is_thumb32(unsigned long given)
{
if ((given & 0xf800) == 0xf800 ||
(given & 0xf800) == 0xf000 ||
(given & 0xf800) == 0xe800)
return true;
return false;
}
$ ./cstool -d thumb 0x00b5
0 00 b5 push {lr}
op_count: 1
operands[0].type: REG = lr
operands[0].access: READ
Registers read: sp lr
Registers modified: sp
Groups: thumb thumb1only
$ ./cstool -d thumb 0x48f23463
0 48 f2 34 63 movw r3, #0x8634
op_count: 2
operands[0].type: REG = r3
operands[0].access: WRITE
operands[1].type: IMM = 0x8634
Registers modified: r3
Groups: thumb2
141b2: e92d 47f0 stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
$ ./cstool -d thumb 0x2de9f047
0 2d e9 f0 47 push.w {r4, r5, r6, r7, r8, sb, sl, lr}
op_count: 8
operands[0].type: REG = r4
operands[0].access: READ | WRITE
operands[1].type: REG = r5
operands[1].access: READ | WRITE
operands[2].type: REG = r6
operands[2].access: READ | WRITE
operands[3].type: REG = r7
operands[3].access: READ | WRITE
operands[4].type: REG = r8
operands[4].access: READ | WRITE
operands[5].type: REG = sb
operands[5].access: READ | WRITE
operands[6].type: REG = sl
operands[6].access: READ | WRITE
operands[7].type: REG = lr
operands[7].access: READ | WRITE
Registers read: sp r4 r5 r6 r7 r8 sb sl lr
Registers modified: sp r4 r5 r6 r7 r8 sb sl lr
Groups: thumb2
The following example jumps back to the prologue and it makes the program crashed. This case has to be detected in advance.
0003356c <uftrace_match_filter>:
3356c: e92d 41f0 stmdb sp!, {r4, r5, r6, r7, r8, lr}
33570: 461d mov r5, r3
33572: 6814 ldr r4, [r2, #0] # Here is the branch target
33574: b1e4 cbz r4, 335b0 <uftrace_match_filter+0x44>
33576: 6926 ldr r6, [r4, #16]
33578: 42b0 cmp r0, r6
...
335bc: 42b0 cmpeq r0, r6
335be: f104 0208 add.w r2, r4, #8
335c2: d3d6 bcc.n 33572 <uftrace_match_filter+0x6> # jumps back to prologue
335c4: 1d22 adds r2, r4, #4
335c6: e7d4 b.n 33572 <uftrace_match_filter+0x6> # jumps back to prologue
335c8: f641 40c0 movw r0, #7360 ; 0x1cc0
335cc: 68e1 ldr r1, [r4, #12]
335ce: f2c0 0005 movt r0, #5
335d2: f000 fead bl 34330 <__pr_dbg>
335d6: f8d8 300c ldr.w r3, [r8, #12]
335da: 2b02 cmp r3, #2
335dc: dde8 ble.n 335b0 <uftrace_match_filter+0x44>
335de: 4628 mov r0, r5
335e0: f7ff fda4 bl 3312c <print_trigger>
335e4: 4620 mov r0, r4
335e6: e8bd 81f0 ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
335ea: bf00 nop
code snippet to detect whether PC register is used.
cs_regs regs_read, regs_write;
uint8_t regs_read_count, regs_write_count;
/* check if the instruction uses PC register */
if (cs_regs_access(disasm->engine, insn, regs_read, ®s_read_count,
regs_write, ®s_write_count)) {
return -1;
}
for (i = 0; i < regs_read_count; i++) {
if (regs_read[i] == ARM_REG_PC) {
fprintf(stderr, "read PC reg\n");
return -1;
}
}
for (i = 0; i < regs_write_count; i++) {
if (regs_write[i] == ARM_REG_PC) {
fprintf(stderr, "write PC reg\n");
return -1;
}
}
For thumb functions, we have to deal with a tricky problem.
The function prologue pattern might use thumb2 instructions randomly.
It's very difficult to patch trampoline code and store the original instructions because we don't know the boundary of the original instructions without disassemble the instruction correctly.