I found possible typo (uint16_t instead of uint64_t) and a place for one possible small optimization: there is no need to call put() if you edited value in-place (if the size of value not changed).
diff --git a/tally-ngrams.c b/tally-ngrams.c
index 40fd82c..4460035 100644
--- a/tally-ngrams.c
+++ b/tally-ngrams.c
@@ -69,7 +69,7 @@ void emit_ngram(char *start_ptr, char *end_ptr)
key.size = (size_t)(end_ptr - start_ptr);
// All values will be 64-bit unsigned integers
- value.size = sizeof(uint16_t);
+ value.size = sizeof(uint64_t);
if (transaction == NULL)
{
@@ -84,6 +84,7 @@ void emit_ngram(char *start_ptr, char *end_ptr)
// We've seen this key before, increment its value
if (DEBUG)
printf("result retrieved: %zu\n", *(uint64_t *)value.data);
+ // Edit value in-place
(*(uint64_t *)value.data)++;
}
else
@@ -93,11 +94,10 @@ void emit_ngram(char *start_ptr, char *end_ptr)
value.data = (uint64_t *)&initialValue;
if (DEBUG)
printf("result initialized: %zu\n", *(uint64_t *)value.data);
+ // Add new key-value pair
+ transaction->put(transaction, &key, &value);
}
- // Store off the result
- transaction->put(transaction, &key, &value);
-
total_ngrams_emitted++;
}
I found possible typo (uint16_t instead of uint64_t) and a place for one possible small optimization: there is no need to call
put()
if you edited value in-place (if the size of value not changed).