spotify / sparkey

Simple constant key/value storage library, for read-heavy systems with infrequent large bulk inserts.
Apache License 2.0
1.18k stars 81 forks source link

sparkey_logiter_reset doesn't reset #18

Closed stephenmathieson closed 10 years ago

stephenmathieson commented 10 years ago

I may have misinterpreted the documentation, but it doesn't look like sparkey_logiter_reset actually resets the iterator.

Test case:


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sparkey/sparkey.h>
#include <assert.h>

#define SPARKEY_ASSERT(rc) ({  \
  if (SPARKEY_SUCCESS != rc) { \
    fprintf(                   \
        stderr                 \
      , "error: %s (%d)\n"     \
      , sparkey_errstring(rc)  \
      , __LINE__               \
    );                         \
    exit(1);                   \
  }                            \
});

int
main(void) {
  sparkey_logwriter *writer = NULL;
  sparkey_logreader *reader = NULL;
  sparkey_logiter *iterator = NULL;
  const char *key1 = "key1";
  const char *value1 = "value1";
  size_t key1size = strlen(key1);
  size_t value1size = strlen(value1);
  const char *key2 = "key2";
  const char *value2 = "value2";
  size_t key2size = strlen(key2);
  size_t value2size = strlen(value2);
  uint64_t wanted;
  uint64_t actual;
  uint8_t *buffer = NULL;

  // create a log
  SPARKEY_ASSERT(sparkey_logwriter_create(
      &writer
    , "test.spl"
    , SPARKEY_COMPRESSION_NONE
    , 0
  ));

  // write some stuff
  SPARKEY_ASSERT(sparkey_logwriter_put(
      writer
    , key1size
    , (uint8_t *) key1
    , value1size
    , (uint8_t *) value1
  ));
  SPARKEY_ASSERT(sparkey_logwriter_put(
      writer
    , key2size
    , (uint8_t *) key2
    , value2size
    , (uint8_t *) value2
  ));

  SPARKEY_ASSERT(sparkey_logwriter_close(&writer));

  SPARKEY_ASSERT(sparkey_logreader_open(&reader, "test.spl"));
  SPARKEY_ASSERT(sparkey_logiter_create(&iterator, reader));

  // get first key
  SPARKEY_ASSERT(sparkey_logiter_next(iterator, reader));
  wanted = sparkey_logiter_keylen(iterator);
  assert((buffer = malloc(wanted)));
  SPARKEY_ASSERT(sparkey_logiter_fill_key(
      iterator
    , reader
    , wanted
    , buffer
    , &actual
  ));

  printf("buffer: %s\n", buffer);
  assert(0 == strcmp("key1", (char *) buffer));
  free(buffer);

  // reset iterator
  SPARKEY_ASSERT(sparkey_logiter_reset(iterator, reader));

  // get key again
  SPARKEY_ASSERT(sparkey_logiter_next(iterator, reader));
  wanted = sparkey_logiter_keylen(iterator);
  assert((buffer = malloc(wanted)));
  SPARKEY_ASSERT(sparkey_logiter_fill_key(
      iterator
    , reader
    , wanted
    , buffer
    , &actual
  ));

  printf("buffer: %s (after reset)\n", buffer);
  assert(0 == strcmp("key1", (char *) buffer));
  free(buffer);

  // cleanup
  sparkey_logiter_close(&iterator);
  sparkey_logreader_close(&reader);
  free(buffer);

  return 0;
}

Yields:

$ gcc -lsparkey reset.c -o reset -Wall -Wextra
$ ./reset
buffer: key1
buffer: key2 (after reset)
Assertion failed: (0 == strcmp("key1", (char *) buffer)), function main, file reset.c, line 98.
Abort trap: 6
spkrka commented 10 years ago

Thanks for the report! I will investigate.

spkrka commented 10 years ago

I think I know what the problem is now. You first call next() to get "key1", then you call reset() to make it point to "key1" again.

However, you follow that with calling next() which moves it to "key2".

When I remove the last next it works as expected.

There were also some minor bugs I had to fix to verify it 1) the lengths should be 1 + strlen since sparkey doesn't return zero-terminated strings, only the pure data you give it. 2) the last free() call is wrong - the buffer is already freed.

spkrka commented 10 years ago

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sparkey/sparkey.h>
#include <assert.h>

#define SPARKEY_ASSERT(rc) ({  \
  if (SPARKEY_SUCCESS != rc) { \
    fprintf(                   \
        stderr                 \
      , "error: %s (%d)\n"     \
      , sparkey_errstring(rc)  \
      , __LINE__               \
    );                         \
    exit(1);                   \
  }                            \
});

int
main(void) {
  sparkey_logwriter *writer = NULL;
  sparkey_logreader *reader = NULL;
  sparkey_logiter *iterator = NULL;
  const char *key1 = "key1";
  const char *value1 = "value1";
  size_t key1size = 1 + strlen(key1);
  size_t value1size = 1 + strlen(value1);
  const char *key2 = "key2";
  const char *value2 = "value2";
  size_t key2size = 1 + strlen(key2);
  size_t value2size = 1 + strlen(value2);
  uint64_t wanted;
  uint64_t actual;
  uint8_t *buffer = NULL;

  // create a log
  SPARKEY_ASSERT(sparkey_logwriter_create(
      &writer
    , "test.spl"
    , SPARKEY_COMPRESSION_NONE
    , 0
  ));

  // write some stuff
  SPARKEY_ASSERT(sparkey_logwriter_put(
      writer
    , key1size
    , (uint8_t *) key1
    , value1size
    , (uint8_t *) value1
  ));
  SPARKEY_ASSERT(sparkey_logwriter_put(
      writer
    , key2size
    , (uint8_t *) key2
    , value2size
    , (uint8_t *) value2
  ));

  SPARKEY_ASSERT(sparkey_logwriter_close(&writer));

  SPARKEY_ASSERT(sparkey_logreader_open(&reader, "test.spl"));
  SPARKEY_ASSERT(sparkey_logiter_create(&iterator, reader));

  // get first key
  SPARKEY_ASSERT(sparkey_logiter_next(iterator, reader));
  wanted = sparkey_logiter_keylen(iterator);
  assert((buffer = malloc(wanted)));
  SPARKEY_ASSERT(sparkey_logiter_fill_key(
      iterator
    , reader
    , wanted
    , buffer
    , &actual
  ));

  printf("buffer: %s\n", buffer);
  assert(0 == strcmp("key1", (char *) buffer));
  free(buffer);

  // reset iterator
  SPARKEY_ASSERT(sparkey_logiter_reset(iterator, reader));

  wanted = sparkey_logiter_keylen(iterator);
  assert((buffer = malloc(wanted)));
  SPARKEY_ASSERT(sparkey_logiter_fill_key(
      iterator
    , reader
    , wanted
    , buffer
    , &actual
  ));

  printf("buffer: %s (after reset)\n", buffer);
  assert(0 == strcmp("key1", (char *) buffer));
  free(buffer);

  // cleanup
  sparkey_logiter_close(&iterator);
  sparkey_logreader_close(&reader);

  return 0;
}
stephenmathieson commented 10 years ago

Thanks for the cleanup/fixes.

For clarification, reset doesn't change the position of the iterator? If so, what does it actually do?

Resets the iterator to the start of the current entry.

Nevermind.

spkrka commented 10 years ago

The main usecase for this function is internal, but it can also be useful if you want to read the key or value twice without doing any extra allocation.

For instance, sparkey_logiter_fill_key can't be run twice on the same entry without a call to reset in between.

stephenmathieson commented 10 years ago

I was thinking it'd be used for getting key/value chunks, but yes, multiple calls to fill too.

Thanks for the speedy responses :)