eurecom-s3 / symqemu

SymQEMU: Compilation-based symbolic execution for binaries
http://www.s3.eurecom.fr/tools/symbolic_execution/symqemu.html
Other
317 stars 40 forks source link

atoi() is not solved #7

Open vanhauser-thc opened 3 years ago

vanhauser-thc commented 3 years ago

Example:

#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>

#define bail(msg, pos)                                         \
  while (1) {                                                  \
                                                               \
    fprintf(stderr, "%s at %u\n", (char *)msg, (uint32_t)pos); \
    return 0;                                                  \
                                                               \
  }

int LLVMFuzzerTestOneInput(uint8_t *buf, size_t len) {

  uint8_t   buff[100];

  if (len < 8) bail("too short", 0);
  if (len > sizeof(buff)) bail("too long", sizeof(buff));

  memcpy(buff, buf, len);
  buff[sizeof(buff) - 1] = 0;

  // string to int
  if (atoi((char *)buff) != 66766) bail("wrong string", 0);

  abort();

  return 0;

}

int main(int argc, char **argv) {

  unsigned char buf[64];
  ssize_t       len;
  int           fd = 0;
  if (argc > 1) fd = open(argv[1], O_RDONLY);

  if ((len = read(fd, buf, sizeof(buf))) <= 0) exit(0);

  LLVMFuzzerTestOneInput(buf, len);
  exit(0);

}
# gcc -o test -g test.c
# echo AAAAAAAAAAAAAAAAAAAAA|symqemu-x86_64 ./test
[STAT] SMT: { "solving_time": 0, "total_time": 65148 }
[STAT] SMT: { "solving_time": 4651 }
[INFO] New testcase: /tmp/output/000000
[...]
[INFO] New testcase: /tmp/output/000028-optimistic
[STAT] SMT: { "solving_time": 120114, "total_time": 560178 }
[STAT] SMT: { "solving_time": 121562 }
[STAT] SMT: { "solving_time": 121562, "total_time": 562343 }
[STAT] SMT: { "solving_time": 255014 }

but none of the 29 generated inputs contain the correct value:

# for i in /tmp/output/*; do hexdump -C $i;done|grep 00000000
00000000  be 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |.AAAAAAAAAAAAAAA|
00000000  00 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |.AAAAAAAAAAAAAAA|
00000000  2d 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |-AAAAAAAAAAAAAAA|
00000000  2b 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |+AAAAAAAAAAAAAAA|
00000000  30 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |0AAAAAAAAAAAAAAA|
00000000  00 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |.AAAAAAAAAAAAAAA|
00000000  30 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |0AAAAAAAAAAAAAAA|
00000000  be 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |.AAAAAAAAAAAAAAA|
00000000  be 41 41 41 41 41 41 41  41 41 41 41 41 41 41 41  |.AAAAAAAAAAAAAAA|
00000000  41 41 41 41 41 41 41 41  80 4e 80 00 40 00 00 00  |AAAAAAAA.N..@...|
00000000  41 41 41 41 41 41 41 41  76 4e 80 00 40 00 00 00  |AAAAAAAAvN..@...|
00000000  41 41 41 41 41 41 41 41  2e 20 00 00 40 00 00 00  |AAAAAAAA. ..@...|
00000000  41 41 41 41 41 41 41 41  01 c8 a4 9d 18 00 00 00  |AAAAAAAA........|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  74 4e 80 00 40 00 00 00  |AAAAAAAAtN..@...|
00000000  41 41 41 41 41 41 41 41  74 4e 80 00 40 00 00 00  |AAAAAAAAtN..@...|
00000000  41 41 41 41 41 41 41 41  f9 1f 00 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  7f 2e 80 00 40 00 00 00  |AAAAAAAA....@...|
00000000  41 41 41 41 41 41 41 41  6f 4e 80 00 40 00 00 00  |AAAAAAAAoN..@...|
00000000  00 00 00 00 41 41 41 41  ee ff ff ff 41 41 41 41  |....AAAA....AAAA|
00000000  00 00 00 00 41 41 41 41  ee ff ff ff 41 41 41 41  |....AAAA....AAAA|
00000000  41 00 00 00 41 41 41 41  80 2e 80 00 40 00 00 00  |A...AAAA....@...|
00000000  00 00 00 00 41 41 41 41  ee ff ff ff 41 41 41 41  |....AAAA....AAAA|

why is this the case?

sebastianpoeplau commented 3 years ago

Probably atoi decodes in a loop; concolic execution then can't reason about the loop as a whole (i.e., as an abstract construct with many possible paths through it), but instead sees only the check of the loop condition in each iteration. It will generate a deviating input, but after that the outcome of the check becomes part of the path constraints and isn't reasoned about any further.