HexHive / retrowrite

RetroWrite -- Retrofitting compiler passes through binary rewriting
Other
664 stars 77 forks source link

Missing some instrumentations when instrumenting binary with AFL #28

Open Marsman1996 opened 3 years ago

Marsman1996 commented 3 years ago

RetroWrite generates labels with the format as .L%x in https://github.com/HexHive/retrowrite/blob/243db5811b96dfe07e6c697b3a2e2d61e0e7855b/librw/rw.py#L190 and https://github.com/HexHive/retrowrite/blob/9e2e633e9ab165681733f3255e648a62b22e6368/librw/container.py#L191

However, for the afl-gcc compilers of AFL-family fuzzers such as AFL++, they only instrument the label start with .L%d

https://github.com/AFLplusplus/AFLplusplus/blob/32a0d6ac31554a47dca591f8978982758fb87677/src/afl-as.c#L464-L466

        if ((isdigit(line[2]) ||
             (clang_mode && !strncmp(line + 1, "LBB", 3))) &&
            R(100) < (long)inst_ratio) {

Taking the nm in binutils as an example, the .L9ffea basic block is instrumented while .La0047 and .La0058 are not instrumented. After the fix, the number of instrumentation increase from 39511 to 47795.

.L9ffea:
.LC9ffea:

/* --- AFL TRAMPOLINE (64-BIT) --- */

.align 4

leaq -(128+24)(%rsp), %rsp
movq %rdx,  0(%rsp)
movq %rcx,  8(%rsp)
movq %rax, 16(%rsp)
movq $0x00006e12, %rcx
call __afl_maybe_log
movq 16(%rsp), %rax
movq  8(%rsp), %rcx
movq  0(%rsp), %rdx
leaq (128+24)(%rsp), %rsp

/* --- END --- */

    movq -0x48(%rbp), %rax
.LC9ffee:
    movq -0x58(%rbp), %rcx
.LC9fff2:
    movq %rax, 8(%rcx)
.LC9fff6:
    movq -0x58(%rbp), %rax
.LC9fffa:
    movl $0xffffffff, 0x60(%rax)
.LCa0001:
    movq -0x58(%rbp), %rax
.LCa0005:
    movl $1, 0x64(%rax)
.LCa000c:
    movl $1, -0x64(%rbp)
.LCa0013:
    movq -0x20(%rbp), %rax
.LCa0017:
    movq (%rax), %rax
.LCa001a:
    cmpq $0, 0x100(%rax)
.LCa0022:
    je .La0047

/* --- AFL TRAMPOLINE (64-BIT) --- */

.align 4

leaq -(128+24)(%rsp), %rsp
movq %rdx,  0(%rsp)
movq %rcx,  8(%rsp)
movq %rax, 16(%rsp)
movq $0x0000740b, %rcx
call __afl_maybe_log
movq 16(%rsp), %rax
movq  8(%rsp), %rcx
movq  0(%rsp), %rdx
leaq (128+24)(%rsp), %rsp

/* --- END --- */

.LCa0028:
    movq -0x20(%rbp), %rax
.LCa002c:
    movq (%rax), %rax
.LCa002f:
    movq 0x100(%rax), %rax
.LCa0036:
    cmpl $0, 0x10(%rax)
.LCa003a:
    jne .La0047

/* --- AFL TRAMPOLINE (64-BIT) --- */

.align 4

leaq -(128+24)(%rsp), %rsp
movq %rdx,  0(%rsp)
movq %rcx,  8(%rsp)
movq %rax, 16(%rsp)
movq $0x000050f3, %rcx
call __afl_maybe_log
movq 16(%rsp), %rax
movq  8(%rsp), %rcx
movq  0(%rsp), %rdx
leaq (128+24)(%rsp), %rsp

/* --- END --- */

.LCa0040:
    movl $0, -0x64(%rbp)
.La0047:
.LCa0047:
    movq -0x20(%rbp), %rax
.LCa004b:
    movq (%rax), %rax
.LCa004e:
    addq $0x100, %rax
.LCa0054:
    movq %rax, -0x60(%rbp)
.La0058:
.LCa0058:
    movq -0x60(%rbp), %rax
.LCa005c:
    cmpq $0, (%rax)
.LCa0060:
    je .La007f

I think RetroWrite could output the label with format .L%d (see https://github.com/HexHive/retrowrite/pull/27), or modify the code in afl-as.c to

        if (((isdigit(line[2]) || (line[2] >= 'a' && line[2] <= 'f')) ||
            (clang_mode && !strncmp(line + 1, "LBB", 3))) &&
            R(100) < (long)inst_ratio) {

The assembly code files are attached here.