kriszyp / lmdb-js

Simple, efficient, ultra-fast, scalable data store wrapper for LMDB
Other
505 stars 42 forks source link

txn.del does not delete an entry on dup sorted dbi #29

Closed kylebernhardy closed 3 years ago

kylebernhardy commented 3 years ago

I am in process of migrating our code base from node-lmdb to lmdb-store and am currently using base level functionality to ease migration as that is what our current code base uses. I plan on using the worker thread transactions ultimately, but in our current code I noticed that entries don't delete on dupsorted dbis. I did verify this does work for the store.remove & does work in node-lmdb, also non-dupsorted dbis remove correctly. See code below:

const fs = require('fs');
const path = require('path');
const lmdb = require('lmdb-store');

function createEnv(name){
    let environment_path = path.join(__dirname, 'data', name);
    try {
         fs.rmdirSync(environment_path, {recursive:true})
        fs.mkdirSync(environment_path);
    }catch(e){
        console.error(e);
    }
    let env_init = {
        "path": environment_path,
        "mapSize": 1024*1024,
        "maxReaders": 100,
        "maxDbs": 10
    };
    let env = new lmdb.Env();
    env.open(env_init);
    return env;
}

let env = createEnv('test');

let name_dbi = env.openDbi({
    name: 'name',
    create: true,
    dupSort: true});

let age_dbi = env.openDbi({
    name: 'age',
    create: true,
    dupSort: false});

(()=> {
    let txn = env.beginTxn();
    txn.putString(name_dbi, 'Kyle', '1');
    txn.putString(name_dbi, 'Jerry', '1');
    txn.putString(name_dbi, 'Jerry', '2');
    txn.putString(age_dbi, 47, '1');
    txn.putString(age_dbi, 37, '2');
    txn.commit();

    iterateDBI(name_dbi);
    iterateDBI(age_dbi);

    txn = env.beginTxn();
    txn.del(name_dbi, 'Kyle', '1');
    txn.del(name_dbi, 'Jerry', '2');
    txn.del(age_dbi, 47);
    txn.commit();

    iterateDBI(name_dbi);
    iterateDBI(age_dbi);
})();

function iterateDBI(dbi){
    console.log('---');
    let txn = env.beginTxn({ readOnly: true });
    let cursor = new lmdb.Cursor(txn, dbi);
    for (let found = cursor.goToFirst(); found !== null && found !== undefined; found = cursor.goToNext()) {
        console.log(found, cursor.getCurrentString());
    }
    cursor.close();
    txn.abort();
}

generated output is:

---
Jerry 1
Jerry 2
Kyle 1
---
37 2
47 1
---
Jerry 1
Jerry 2
Kyle 1
---
37 2

No entries from the dupsorted dbi are removed.

kriszyp commented 3 years ago

This is because I made the default string encoding in del be UTF-8, whereas putString is doing UTF-16 encoding, and so putString results in different binary data that doesn't match del (whereas putUtf8 would match del). I did this because lmdb-store prefers encoding strings as UTF-8 (using putUtf8 instead of putString), and that's the default handling raw strings, object serialization, etc. Of course del, as inherited from node-lmdb is rather ambiguous in respect to value serialization and doesn't have all the type specific serializations of put{String|Binary|Boolean|Number|Utf}. I suppose I could make an explicit delUtf8 and revert the del handling back to the node-lmdb for better back-compat.

kylebernhardy commented 3 years ago

No need for you to do anything, I can easily adjust our code as it is well centralized. Thank you for the insight, I will use putUtf8 / getCurrentUtf8