shellphish / driller

Driller: augmenting AFL with symbolic execution!
BSD 2-Clause "Simplified" License
880 stars 163 forks source link

Testing driller with a program that reads a file #48

Open insuyun opened 6 years ago

insuyun commented 6 years ago

Hi, all. I want to test the driller with a program that reads a file, like djpeg. First, I want to test a very simple example,

#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>

int main(int argc, char** argv) {
  int fd = open(argv[1], O_RDONLY);
  int input = 0;
  read(fd, &input, sizeof(input));

  if (input == 0xdeadbeef)
    printf("Good");

  close(fd);
}

To do it, I modified the following line to get fs, and argv.

        s = p.factory.tracer_state(input_content=self.input, magic_content=r.magic, args=self.argv, fs=self._fs)                                                                                            

And run the driller as follows

    input_file = 'sample/input.bin'                                                                                                                                                                         
    binary = 'sample/main'                                                                                                                                                                                  

    with open(input_file, 'rb') as f:                                                                                                                                                                       
        inputs = f.read()                                                                                                                                                                                   

    fs = {input_file: simuvex.storage.file.SimFile(input_file, "rb", size=os.path.getsize(input_file))}                                                                                                     
    d = driller.Driller(binary, inputs, argv=[binary, input_file], fs=fs)                                                                                                                                   
    for drilled in d.drill_generator():                                                                                                                                                                     
        print(drilled)        

The input_file is just "AAAA". But unfortunately, I couldn't get 0xdeadbeef. Driller generates a testcase, but it was just empty string. I checked that open() returns SimFile. Could you let me know where do I have to take a look? Thanks.

salls commented 6 years ago

Driller is heavily hardcoded to use stdin. You'll have to find in tracer where it preconstrains the input and make it use the file instead. And you'll have to look at where driller dumps inputs, because it's also just dumping stdin. It will definitely require some work to change from using stdin to using an input file.

The other option, if it doesn't use stdin. Is to hook/change the file reads so they read from stdin instead. Then you can use driller as is

insuyun commented 6 years ago

Thanks. Let me try the former way, and if fail, then I will try the latter. I am worried that testing programs could have file-related calls such as fseek or ftell.

ghost commented 6 years ago

I want to do exactly that, have you managed to do it? @jakkdu

insuyun commented 6 years ago

Hi @agarciagonzalez. I ran a program with /dev/stdin, and pass the input through stdin. It seems working and generates some testcases, but it seems that it couldn't generate many testcases. I stop there and stop to debug it.

insuyun commented 6 years ago

@agarciagonzalez FYI, this is my code for reading file input using driller. This uses kinda bug in the angr that it treats /dev/stdin as a normal file, and it lets us do file operations such as lseek. @salls Do you think it will be fine? I didn't change any core part of driller, but the qemu part(to get file as input), and use /dev/stdin if we use angr.

import os
import copy
import logging

import angr
import driller
import tracer

l = logging.getLogger("driller.driller_file")

# logging.getLogger("angr.state_plugins.posix").setLevel(logging.DEBUG)

# commit: c536408e9d70b8b0743db55efb9aa2e7e96c5601

# NOTE: The file path should be absolute path!
# argv should be ['./binary', '@/tmp/input_file.txt']

def patch_argv_qemu(argv):
    patched_argv = copy.copy(argv)
    index = -1
    for i, arg in enumerate(argv):
        if arg.startswith("@"):
            patched_argv[i] = argv[i][1:]
            assert(index == -1)
            index = i
    return patched_argv

def patch_argv_angr(argv):
    patched_argv = copy.copy(argv)
    index = -1
    for i, arg in enumerate(argv):
        if arg.startswith("@"):
            patched_argv[i] = "/dev/stdin"
            assert(index == -1)
            index = i
    return patched_argv

class DrillerFile(driller.Driller):
    def _parse_size(self):
        input_file = None
        for i, arg in enumerate(self.argv):
            if arg.startswith("@"):
                assert(input_file is None)
                input_file = self.argv[i][1:]

        return os.path.getsize(input_file)

    def _drill_input(self):
        """
        Symbolically step down a path with a tracer, trying to concretize inputs for unencountered
        state transitions.
        """

        # initialize the tracer
        r = tracer.qemu_runner.QEMURunner(self.binary, self.input, argv=patch_argv_qemu(self.argv))
        p = angr.Project(self.binary)
        for addr, proc in self._hooks.items():
            p.hook(addr, proc)
            l.debug("Hooking %#x -> %s...", addr, proc.display_name)

        if p.loader.main_object.os == 'cgc':
            p.simos.syscall_library.update(angr.SIM_LIBRARIES['cgcabi_tracer'])

        files = {'/dev/stdin': angr.storage.file.SimFile("/dev/stdin", "r", size=self._parse_size())}
        s = p.factory.tracer_state(input_content=self.input, magic_content=r.magic, args=patch_argv_angr(self.argv), fs=files)

        simgr = p.factory.simgr(s, save_unsat=True, hierarchy=False, save_unconstrained=r.crash_mode)

        t = angr.exploration_techniques.Tracer(trace=r.trace)
        c = angr.exploration_techniques.CrashMonitor(trace=r.trace, crash_mode=r.crash_mode, crash_addr=r.crash_addr)
        self._core = angr.exploration_techniques.DrillerCore(trace=r.trace)

        simgr.use_technique(c)
        simgr.use_technique(t)
        simgr.use_technique(angr.exploration_techniques.Oppologist())
        simgr.use_technique(self._core)

        self._set_concretization s(simgr.one_active)

        l.debug("Drilling into %r.", self.input)
        l.debug("Input is %r.", self.input)

        while simgr.active and simgr.one_active.globals['bb_cnt'] < len(r.trace):
            simgr.step()

            # Check here to see if a crash has been found.
            if self.redis and self.redis.sismember(self.identifier + '-finished', True):
                return

            if 'diverted' not in simgr.stashes:
                continue

            while simgr.diverted:
                state = simgr.diverted.pop(0)
                l.debug("Found a diverted state, exploring to some extent.")
                w = self._writeout(state.history.bbl_addrs[-1], state)
                if w is not None:
                    yield w
                for i in self._symbolic_explorer_stub(state):
                    yield i
        print(r.trace, simgr.one_active.globals['bb_cnt'])
xianghaohyman commented 6 years ago

hi @jakkdu
you try the former way provided by the owner,salls? and I have another problem,why change the qemu part(to get file as input), but use /dev/stdin if we use angr? could you explain the connection between “/dev/stdin" and input file? thanks and looking forward to your reply.

insuyun commented 6 years ago
  1. I changed qemu part to make qemu to execute a program with a given input. So, the qemu will get correct concrete path that if a program gets the input.

  2. I changed angr to get /dev/stdin because current driller is dedicatedly design for stdin. The trick is that (I think its kinda bug) angr allows file operations even we open /dev/stdin If you see seek in posix.py, it only checks whether the file descriptor is 0,1,2. But if you open /dev/stdin, it will be more than that, but still stdin. So we can use file operations with stdin.

So what the above script does is that it connects /dev/stdin of angr and input file of concrete execution both to use driller feature (which is designed for stdin) and to get correct path for concrete execution.

Hope this can help you. Thanks.

xianghaohyman commented 6 years ago

@jakkdu I use the above script to test the above example, but the can not generate any testcases..and..give the following warnnings :

/Desktop/driller/driller-master/tests$ python test_driller.py drilling_file
WARNING | 2018-04-19 04:22:18,639 | angr.analyses.disassembly_utils | Your verison of capstone does not support MIPS instruction groups.
DEBUG   | 2018-04-19 04:22:18,651 | driller.driller | [test] drilling started on Thu Apr 19 04:22:18 2018.
WARNING | 2018-04-19 04:22:18,651 | driller.driller | Debug directory is not set. Will not log fuzzing bitmap.
WARNING | 2018-04-19 04:22:20,264 | angr.simos.linux | Tracer has been heavily tested only for CGC. If you find it buggy for Linux binaries, we are sorry!
WARNING | 2018-04-19 04:22:21,597 | angr.exploration_techniques.tracer | Unable to correct discrepancy between qemu and angr.
DEBUG   | 2018-04-19 04:22:21,648 | driller.driller | [test] dumping input for 0x4006c6 -> 0x4006d0.
DEBUG   | 2018-04-19 04:22:21,648 | driller.driller | Generated: 41414141
DEBUG   | 2018-04-19 04:22:22,475 | driller.driller | [test] started symbolic exploration at Thu Apr 19 04:22:22 2018.
DEBUG   | 2018-04-19 04:22:44,076 | driller.driller | [test] stopped symbolic exploration at Thu Apr 19 04:22:44 2018.
WARNING | 2018-04-19 04:22:44,133 | angr.exploration_techniques.tracer | Unable to correct discrepancy between qemu and angr.

the test example

`#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>

int main(int argc, char** argv) {
  int fd = open(argv[1], O_RDONLY);
  int input = 0;
  read(fd, &input, sizeof(input));

  if (input == 0xdeadbeef)
    printf("Good");

  close(fd);
}`

the input file contains "AAAA"

run the driller as follows

def test_drilling_file():
    """
    Test drilling on the cgc binary, palindrome.
    """

    binary = "sample/test"
    input_file = "sample/input.bin"
    with open(input_file, 'rb') as f:
        inputs = f.read()

    # fuzzbitmap says every transition is worth satisfying.
    d = driller.DrillerFile(binary, inputs, argv=[binary, "@sample/input.bin"])

    new_inputs = d.drill()

could you give me some advice?@jakkdu

insuyun commented 6 years ago

Sorry, I forgot to mention that the file path should be absolute path. Could you try with the absolute path? i.e., @sample/input.bin --> @/tmp/sample/input.bin

insuyun commented 6 years ago

@xianghaohyman I saw another comment from you, but seems removed. Is it working now?

xianghaohyman commented 6 years ago

@jakkdu it works , thanks for your advice

UESuperGate commented 1 year ago

Because of the version update of Angr, It's a pity that the scripts mentioned above cannot suit for current environment anymore. :( For example, function tracer_state is not supported by Angr, and so is the class CrashMode.

I tried to replace the original out-of-date APIs with currently avaliable ones:

class DrillerFile(Driller):
    def _parse_size(self):
        input_file = None
        for i, arg in enumerate(self.argv):
            if arg.startswith("@"):
                assert(input_file is None)
                input_file = self.argv[i][1:]

        return os.path.getsize(input_file)

    def _writeout(self, prev_addr, state):
        generated = state.fs.get("/dev/stdin").concretize() # state.posix.stdin.load(0, state.posix.stdin.pos)

        key = (len(generated), prev_addr, state.addr)

        # Checks here to see if the generation is worth writing to disk.
        # If we generate too many inputs which are not really different we'll seriously slow down AFL.
        if self._in_catalogue(*key):
            self._core.encounters.remove((prev_addr, state.addr))
            return None

        else:
            self._add_to_catalogue(*key)

        l.debug("[%s] dumping input for %#x -> %#x.", self.identifier, prev_addr, state.addr)

        self._generated.add((key, generated))

        if self.redis:
            # Publish it out in real-time so that inputs get there immediately.
            channel = self.identifier + '-generated'

            self.redis.publish(channel, pickle.dumps({'meta': key, 'data': generated, "tag": self.tag}))

        else:
            l.debug("Generated: %s", binascii.hexlify(generated))

        return (key, generated)

    def _drill_input(self):
        """
        Symbolically step down a path with a tracer, trying to concretize inputs for unencountered
        state transitions.
        """

        # initialize the tracer
        r = tracer.qemu_runner.QEMURunner(self.binary, self.input, argv=patch_argv_qemu(self.argv))
        p = angr.Project(self.binary)
        for addr, proc in self._hooks.items():
            p.hook(addr, proc)
            l.debug("Hooking %#x -> %s...", addr, proc.display_name)

        if p.loader.main_object.os == 'cgc':
            p.simos.syscall_library.update(angr.SIM_LIBRARIES['cgcabi_tracer'])

        files = {'/dev/stdin': angr.storage.file.SimFile("/dev/stdin", "r", size=self._parse_size())}

        if p.loader.main_object.os == 'cgc':
            p.simos.syscall_library.update(angr.SIM_LIBRARIES['cgcabi_tracer'])

            s = p.factory.entry_state(stdin=angr.SimFileStream, flag_page=r.magic, mode='tracing')
        else:
            s = p.factory.entry_state(args=patch_argv_angr(self.argv), fs=files)
            # s = p.factory.full_init_state(args=patch_argv_angr(self.argv), fs=files)

        s.preconstrainer.preconstrain_file(self.input, s.posix.stdin, True)

        simgr = p.factory.simgr(s, save_unsat=True, hierarchy=False, save_unconstrained=r.crash_mode)

        t = angr.exploration_techniques.Tracer(trace=r.trace, crash_addr=r.crash_addr, copy_states=True, follow_unsat=True, mode="permissive")
        self._core = angr.exploration_techniques.DrillerCore(trace=r.trace, fuzz_bitmap=self.fuzz_bitmap)

        simgr.use_technique(t)
        simgr.use_technique(angr.exploration_techniques.Oppologist())
        simgr.use_technique(self._core)

        self._set_concretizations(simgr.one_active)

        l.debug("Drilling into %r.", self.input)
        l.debug("Input is %r.", self.input)

        while simgr.active and simgr.one_active.globals['trace_idx'] < len(r.trace) - 1:
            simgr.step()

            # Check here to see if a crash has been found.
            if self.redis and self.redis.sismember(self.identifier + '-finished', True):
                return

            if 'diverted' not in simgr.stashes:
                continue

            while simgr.diverted:
                state = simgr.diverted.pop(0)
                l.debug("Found a diverted state, exploring to some extent.")
                w = self._writeout(state.history.bbl_addrs[-1], state)
                if w is not None:
                    yield w
                for i in self._symbolic_explorer_stub(state):
                    yield i

It works fine for this simple test program:

/// echo -e "\x00\x00\x00\x00" > input
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

int main(int argc, char **argv) {
    int x;
    printf("ready to open %s\n", argv[1]);
    int fd = open(argv[1], O_RDONLY);
    if (fd < 0) {
        printf("GG\n");
        return 0;
    }
    read(fd, &x, 4);
    close(fd);
    if (x > 60000) printf("xxx\n");
    else printf("YYY\n");
    return 0;
}