munificent / craftinginterpreters

Repository for the book "Crafting Interpreters"
http://www.craftinginterpreters.com/
Other
8.5k stars 1.01k forks source link

Issue in chapter 20.5 String Interning: interpreter crashes when provided strings #1057

Closed SimonCarlson closed 2 years ago

SimonCarlson commented 2 years ago

Hi. I'm at chapter 20.5 in the book and my interpreter crashes when provided a string of length < 2 or > 3. For instance:

> "aa"
== code ==
0000    1 OP_CONSTANT         0 'aa'
0002    2 OP_RETURN

0000    1 OP_CONSTANT         0 'aa'
          [ aa ]
0002    2 OP_RETURN
aa
> "aaa"
== code ==
0000    1 OP_CONSTANT         0 'aaa'
0002    2 OP_RETURN

0000    1 OP_CONSTANT         0 'aaa'
          [ aaa ]
0002    2 OP_RETURN
aaa

works but

> "a"
a.out: malloc.c:2379: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)' failed.
Aborted

does not. These problems did not occur in the previous part, 20.4. I notice this starts to happen when I include the line

tableSet(&vm.strings, string, NIL_VAL);

in the allocateString function in object.c. I tried using valgrind and get these errors:

==6732== Invalid write of size 8
==6732==    at 0x10B5AD: tableSet (table.c:85)
==6732==    by 0x10A527: allocateString (object.c:26)
==6732==    by 0x10A647: copyString (object.c:49)
==6732==    by 0x109BA8: string (compiler.c:174)
==6732==    by 0x109C63: parsePrecedence (compiler.c:241)
==6732==    by 0x109CDF: expression (compiler.c:255)
==6732==    by 0x109D2F: compile (compiler.c:266)
==6732==    by 0x10C651: interpret (vm.c:161)
==6732==    by 0x10A111: repl (main.c:20)
==6732==    by 0x10A2E2: main (main.c:66)
==6732==  Address 0x4a4ea50 is 16 bytes after a block of size 192 alloc'd
==6732==    at 0x483B723: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==6732==    by 0x483E017: realloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==6732==    by 0x10A37F: reallocate (memory.c:12)
==6732==    by 0x10B363: adjustCapacity (table.c:53)
==6732==    by 0x10B53F: tableSet (table.c:78)
==6732==    by 0x10A527: allocateString (object.c:26)
==6732==    by 0x10A647: copyString (object.c:49)
==6732==    by 0x109BA8: string (compiler.c:174)
==6732==    by 0x109C63: parsePrecedence (compiler.c:241)
==6732==    by 0x109CDF: expression (compiler.c:255)
==6732==    by 0x109D2F: compile (compiler.c:266)
==6732==    by 0x10C651: interpret (vm.c:161)
==6732==

where I feel like the chain tableSet -> adjustCapacity -> reallocate -> realloc is the culprit, but I've not been able to confirm this using gdb as stepping through the code is finicky (for instance stepping over the line void* result = realloc(pointer, newSize); in reallocate in memory.c sometimes causes a crash, but using next instead never causes a crash).

I've compared my source code to the one in the repository and cannot find any errors., find my current versions of table.c and memory.c below. I'm out of ideas on what to do here, any help is appreciated.

table.c:

#include <string.h>

#include "memory.h"
#include "object.h"
#include "table.h"
#include "value.h"

#define TABLE_MAX_LOAD 0.75

void initTable(Table* table) {
  table->count = 0;
  table->capacity = 0;
  table->entries = NULL;
}

void freeTable(Table* table) {
  FREE_ARRAY(Entry, table->entries, table->capacity);
  initTable(table);
}

static Entry* findEntry(Entry* entries, int capacity,
                        ObjString* key) {
  uint32_t index = key->hash & capacity;
  Entry* tombstone = NULL;
  for (;;) {
    Entry* entry = &entries[index];
    if (entry->key == NULL) {
      if (IS_NIL(entry->value)) {
        return tombstone != NULL ? tombstone : entry;
      } else {
        if (tombstone == NULL) tombstone = entry;
      }
    } else if (entry->key == key) {
      return entry;
    }

    index = (index + 1) % capacity;
  }
}

bool tableGet(Table* table, ObjString* key, Value* value) {
  if (table->count == 0) return false;

  Entry* entry = findEntry(table->entries, table->capacity, key);
  if (entry->key == NULL) return false;

  *value = entry->value;
  return true;
}

static void adjustCapacity(Table* table, int capacity) {
  Entry* entries = ALLOCATE(Entry, capacity);
  for (int i = 0; i < capacity; i++) {
    entries[i].key = NULL;
    entries[i].value = NIL_VAL;
  }

  table->count = 0;
  for (int i = 0; i < table->capacity; i++) {
    Entry* entry = &table->entries[i];
    if (entry->key == NULL) continue;

    Entry* dest = findEntry(entries, capacity, entry->key);
    dest->key = entry->key;
    dest->value = entry->value;
    table->count++;
  }

  FREE_ARRAY(Entry, table->entries, table->capacity);
  table->entries = entries;
  table->capacity = capacity;
}

bool tableSet(Table* table, ObjString* key, Value value) {
  if (table->count + 1 > table->capacity * TABLE_MAX_LOAD) {
    int capacity = GROW_CAPACITY(table->capacity);
    adjustCapacity(table, capacity);
  }
  Entry* entry = findEntry(table->entries, table->capacity, key);
  bool isNewKey = entry->key == NULL;
  if (isNewKey && IS_NIL(entry->value)) table->count++;

  entry->key = key;
  entry->value = value;
  return isNewKey;
}

bool tableDelete(Table* table, ObjString* key) {
  if (table->count == 0) return false;

  Entry* entry = findEntry(table->entries, table->capacity, key);
  if (entry->key == NULL) return false;

  entry->key = NULL;
  entry->value = BOOL_VAL(true);
  return true;
}

void TableAddAll(Table* from, Table* to) {
  for (int i = 0; i < from->capacity; i++) {
    Entry* entry = &from->entries[i];
    if (entry->key != NULL) {
      tableSet(to, entry->key, entry->value);
    }
  }
}

memory.c:

#include <stdlib.h>

#include "memory.h"
#include "vm.h"

void* reallocate(void* pointer, size_t oldSize, size_t newSize) {
  if (newSize == 0) {
    free(pointer);
    return NULL;
  }

  void* result = realloc(pointer, newSize);
  if (result == NULL) exit(1);
  return result;
}

static void freeObject(Obj* object) {
  switch(object->type) {
    case OBJ_STRING: {
      ObjString* string = (ObjString*)object;
      FREE_ARRAY(char, string->chars, string->length + 1);
      FREE(ObjString, object);
      break;
    }
  }
}

void freeObjects() {
  Obj* object = vm.objects;
  while (object != NULL) {
    Obj* next = object->next;
    freeObject(object);
    object = next;
  }
}
SimonCarlson commented 2 years ago

I added a prints to reallocate:

...
printf("pointer %p newSize %ld\n", pointer, newSize);
void *result = realloc(pointer, newSize);
prtinf("realloc successfully called\n");
...

and get the following output.

Successfully allocating string "aa":

> "aa"
pointer (nil) newSize 3
realloc successfully called
pointer (nil) newSize 40
realloc successfully called
pointer (nil) newSize 192
realloc successfully called
pointer (nil) newSize 128
realloc successfully called
pointer (nil) newSize 8
realloc successfully called
pointer (nil) newSize 32
realloc successfully called
== code ==
0000    1 OP_CONSTANT         0 'aa'
0002    2 OP_RETURN

0000    1 OP_CONSTANT         0 'aa'
          [ aa ]
0002    2 OP_RETURN
aa

Crashing on string "a":

> "a"
pointer (nil) newSize 2
realloc successfully called
pointer (nil) newSize 40
realloc successfully called
pointer (nil) newSize 192
realloc successfully called
pointer (nil) newSize 128
a.out: malloc.c:2379: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)' failed.
Aborted

The function is called with the same arguments both times, except the very first invocation where newSize is 3 and 2 respectively, yet only fails in one case

SimonCarlson commented 2 years ago

There was a bug in table.c: I was calculating the index as uint32_t index = key->hash & capacity; instead of uint32_t index = key->hash % capacity;.