Qiling + unicornafl seems like can't find an easy integer overflow

dzonerzy commented 2 years ago

Basically i created a vulnerable binary and linked it against uclib-ng (arm-eabihf), below the source code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char* c_readfile(char *filename, size_t *filesize)
{
   char *buffer = NULL;
   int read_size;
   FILE *handler = fopen(filename, "rb");
   if (handler)
   {
       fseek(handler, 0, SEEK_END);
       *filesize = ftell(handler);
       rewind(handler);
       buffer = (char*) malloc(sizeof(char) * (*filesize + 1) );
       read_size = fread(buffer, sizeof(char), *filesize, handler);
       // buffer[filesize] = '\0';
       if (*filesize != read_size)
       {
           free(buffer);
           buffer = NULL;
       }
       fclose(handler);
    }
    return buffer;
}

typedef enum {
    STYPE_ANGELO = 0xdeadbeef,
    STYPE_JACK = 0xcafebabe,
    STYPE_DZONERZY = 0xc00fc00f,
    STYPE_INVALID = -1
} stype_t;

typedef char byte;
typedef unsigned char ubyte;

typedef struct {
    stype_t Kind;
    short Length;
    char * Buffer;
} content_t, *pcontent_t;

pcontent_t parse_data(unsigned char * buffer, size_t buf_len) {
    pcontent_t data = (pcontent_t) malloc(sizeof(content_t));
    data->Kind = STYPE_INVALID;
    data->Length = 0;
    data->Buffer = NULL;
    size_t counter = 0;
    while(counter < buf_len) {
        unsigned long long kind = *(unsigned long long *)(buffer + counter);
        switch(kind) {
            case STYPE_ANGELO:
                printf("Got kind ANGELO!\n");
                data->Kind = kind;
                break;
            case STYPE_JACK:
                printf("Got kind JACK!\n");
                data->Kind = kind;
                break;
            case STYPE_DZONERZY:
                printf("Got kind DZONERZY!\n");
                                data->Kind = kind;
                break;
            default:
                printf("Invalid kind 0x%llx\n", kind);
                exit(0);
        }
        counter += sizeof(kind);
        data->Length = *(short *)(buffer + counter);
        counter += sizeof(data->Length);
        printf("Got size: 0x%x\n", data->Length, (short)(data->Length));
        if((short)(data->Length + 1) > 32) { // integer overflow happen here
            printf("invalid length > 32\n");
            exit(0);
        }else{
            data->Buffer = malloc(32);
            memset(data->Buffer, 0, data->Length);
            memcpy(data->Buffer, (buffer + counter), data->Length);
        }
        counter += data->Length;
        printf("counter = %d\n",counter);
    }
    return data;
}

int main(int argc, char ** argv) {
    if(argc == 2) {
        char * inputfile = argv[1];
        printf("Using input file: %s\n", inputfile);
        size_t filesize = 0;
        unsigned char *buffer = c_readfile(inputfile, &filesize);
        if(!buffer) {
            printf("Invalid file specified!\n");
            exit(-1);
        }
        printf("Got file %lu bytes, start parsing\n", filesize);
        pcontent_t ret = parse_data(buffer, filesize);
        printf("Kind: 0x%llx Length: 0x%08x Buffer: %p\n", ret->Kind, ret->Length, ret->Buffer);
    }else{
        printf("Usage: %s <input>\n", argv[0]);
        return -1;
    }

    return 0;
}

Then i created a simple qiling script which make first a snapshot the use the snapshot to fuzz the parsing function

import os
import signal
import sys
import qiling.core
from qiling import *
from qiling.const import QL_VERBOSE
import unicornafl

def snap(ql: qiling.core.Qiling):
    r0 = ql.reg.read("r0")
    size = ql.reg.read("r1")
    mem = ql.mem.read(r0, size)
    # just a check to see if r0 is pointing to the expected buffer
    print(mem)
    ql.save(snapshot="snap.bin")
    exit(0)

def start_afl(ql: qiling.core.Qiling, input_file):
    def place_input_callback(uc: unicornafl.Uc, fuzzed: bytes, persistent_round: int, data):
        # r0 is a pointer to fuzzed input
        # r1 is the buffer length
        allocated_mem = ql.mem.map_anywhere(len(fuzzed))
        # write the fuzzed input to the allocated memory
        ql.mem.write(allocated_mem, fuzzed)
        # overwrite r0 pointer with the new buffer location
        ql.reg.write("r0", allocated_mem)
        # update buffer length
        ql.reg.write("r1", len(fuzzed))
        return True
    try:
        if not ql.uc.afl_fuzz(input_file=input_file,
                              place_input_callback=place_input_callback,
                              exits=[ql.os.exit_point]):
            print("Ran once without AFL attached.")
            exit(0)
    except unicornafl.UcAflError as ex:
        if ex != unicornafl.UC_AFL_RET_CALLED_TWICE:
            raise

def my_syscall_write(ql: Qiling, write_fd, write_buf, write_count, *args, **kw):
    data = ql.mem.read(write_buf, write_count)
    # disable printing
    # print(data.decode(), end="")
    return write_count

def mem_read_write(ql: Qiling, size, addr, value, unk):
    if not ql.mem.is_mapped(addr, size):
        os.abort()

def abort(ql, int_code):
    os.kill(os.getpid(), signal.SIGSEGV)

def emulate(path, rootfs, input_file):
    ql = Qiling(path, rootfs, verbose=QL_VERBOSE.DEBUG)
    # This is done only the first time to save snapshot
    # ql.hook_address(snap, address=0x00010b30)
    # ql.run()

    ql.restore(snapshot="./snap.bin")  # restore the snapshot right before calling the parse function
    ql.hook_address(start_afl, 0x00010b30, user_data=input_file)  # hook right before the parsing function and place
    # fuzzed input in memory
    ql.set_syscall(0x4, my_syscall_write)  # disable stdout/stderr printing
    ql.hook_mem_write(mem_read_write)  # hook memory to check if we are writing on unallocated memory

    try:
        ql.emu_start(begin=0x00010b30, end=0x0010b34)
    except:
        abort(ql, 0)

if __name__ == "__main__":
    unicornafl.monkeypatch()
    if len(sys.argv) == 2:
        emulate(["./test", ""], "./rootfs", sys.argv[1])
    else:
        print(f"Usage {sys.argv[0]} <input>")

Anyway seems like after 5 completed cycles it still can't find the vulnerable path, while forcing it into place_input_callback just works fine and make afl register the crash.

I run afl with

AFL_DEBUG=1 afl-fuzz -D -U -i input/ -o output/ -- python3 main.py @@

test-arm.tar.gz

┌─ process timing ────────────────────────────────────┬─ overall results ────┐
│        run time : 0 days, 0 hrs, 21 min, 3 sec      │  cycles done : 25    │ <- 25 cycles completed :(
│   last new find : 0 days, 0 hrs, 20 min, 53 sec     │ corpus count : 16    │
│last saved crash : none seen yet                     │saved crashes : 0     │
│ last saved hang : none seen yet                     │  saved hangs : 0     │
├─ cycle progress ─────────────────────┬─ map coverage┴──────────────────────┤
│  now processing : 4.311 (25.0%)      │    map density : 0.63% / 0.68%      │
│  runs timed out : 0 (0.00%)          │ count coverage : 1.22 bits/tuple    │
├─ stage progress ─────────────────────┼─ findings in depth ─────────────────┤
│  now trying : havoc                  │ favored items : 8 (50.00%)          │
│ stage execs : 58/145 (40.00%)        │  new edges on : 8 (50.00%)          │
│ total execs : 133k                   │ total crashes : 0 (0 saved)         │
│  exec speed : 102.7/sec              │  total tmouts : 0 (0 saved)         │
├─ fuzzing strategy yields ────────────┴─────────────┬─ item geometry ───────┤
│   bit flips : 1/704, 0/689, 0/659                  │    levels : 2         │
│  byte flips : 0/88, 0/73, 0/43                     │   pending : 1         │
│ arithmetics : 0/4927, 0/977, 0/19                  │  pend fav : 0         │
│  known ints : 8/398, 6/1849, 0/1888                │ own finds : 15        │
│  dictionary : 0/0, 0/0, 0/0                        │  imported : 0         │
│havoc/splice : 0/73.8k, 0/47.3k                     │ stability : 100.00%   │
│py/custom/rq : unused, unused, unused, unused       ├───────────────────────┘
│    trim/eff : 92.26%/27, 0.00%                     │          [cpu000:100%]
└────────────────────────────────────────────────────┘

domenukk commented 2 years ago

The fuzzer won't be able to brute-force 32 bit values, usually. You can try to hand the values as token/dictionary to AFL and it should work. Alternatively, switch to QEMU mode and use complog or wait for upstream unicorn to fix CMP hooks, cc @wtdcode

wtdcode commented 2 years ago

Could you have a retry now?

dzonerzy commented 2 years ago

Sure I'll let you know once tested.

dzonerzy commented 2 years ago

I tried after updating I'm having a different issue now, here's the code:

import os
import sys
from qiling import Qiling
from qiling.const import QL_VERBOSE
from qiling.extensions.afl import ql_afl_fuzz

def start_afl(ql: Qiling, user_data):
    def place_input_callback(_ql: Qiling, fuzzed: bytes, persistent_round: int):
        size = len(fuzzed)
        mem = _ql.reg.read("r0")  # here r0 should point to buffer, instead I get 0, seems like uc context is lost
        _ql.reg.write("r1", size)
        _ql.mem.write(mem, fuzzed)
        return True
    try:
        ql_afl_fuzz(ql, input_file=user_data, place_input_callback=place_input_callback, exits=[ql.os.exit_point])
    except:
        os.abort()

def emulate(binary, rootfs, fuzzed_binary):
    ql = Qiling(binary, rootfs, verbose=QL_VERBOSE.DEBUG)
    ql.restore(snapshot="./snap.bin")
    ql.hook_address(start_afl, 0x00010b30, user_data=fuzzed_binary)
    ql.emu_start(begin=0x00010b30, end=0x0010b34)

if __name__ == "__main__":
    emulate(["./test", "./pier"], "./rootfs", sys.argv[1])

Inside the place_input_callback callback the Qiling context seems wrong, in fact r0 register inside the start_afl callback point to the buffer while inside the place_input_callback is zero. That wasn't happening with the previous version. cc @wtdcode

dzonerzy commented 2 years ago

Any updates on this?

AFLplusplus / unicornafl

Qiling + unicornafl seems like can't find an easy integer overflow #4