nextapps-de / flexsearch

Next-Generation full text search library for Browser and Node.js
Apache License 2.0
12.48k stars 491 forks source link

Trouble with import/export after upgrade from 0.6.30 to 0.7.31 #382

Open seansica opened 1 year ago

seansica commented 1 year ago

I am upgrading FlexSearch from v0.6.30 to the latest version, 0.7.31. I am testing import and export functionality in Node.

I want to write FlexSearch data to IndexedDB via export, and I want to import data from IndexedDB to new instances of FlexSearch.

My understanding is that the library was completely rewritten and many aspects of the API have changed. Our old application of the import/export functionality no longer works.

Here's a simplified summary of how we were importing/exporting before:

// v0.6.30 import/export implementation

// example initialization
const index = new FlexSearch({
  encode: "simple",     //phonetic normalizations
  tokenize: "forward",  //match substring beginning of word
  threshold: 2,         //exclude scores below this number
  resolution: 9,        //how many steps in the scoring algorithm
  depth: 4,             //how far around words to search for adjacent matches. Disabled for title
  doc: {
      id: "id",
      field: "data"
  }
});

// example import from IndexedDB to FlexSearch
localforage.getItem("INDEX_KEY").then(cachedIndexData => {
    index.import(cachedIndexData);
});

// example export from FlexSearch to IndexedDB
localforage.setItem(INDEX_KEY, this.index.export());

We've moved to the Document implementation because we are handling multiple indexes. Here is a working example:

const Dexie = require('dexie');
const FlexSearch = require('flexsearch');
const { indexedDB, IDBKeyRange } = require('fake-indexeddb');
const { Document } = FlexSearch;
​
const INDEX_KEY = 'book-index-key';

const indexeddb = new Dexie("BookDatabase", {
    indexedDB: indexedDB,
    IDBKeyRange: IDBKeyRange
});
indexeddb.version(1).stores({
    books: '++id, title, author, year',
    attackIndex: '++key,data'
});

const bookData = [
    { id: 1, title: 'The Great Gatsby', author: 'F. Scott Fitzgerald', year: 1925 },
    { id: 2, title: 'Jane Eyre', author: 'Charlotte Bronte', year: 1847 },
    { id: 3, title: 'Pride and Prejudice', author: 'Jane Austen', year: 1813 },
    { id: 4, title: 'Nineteen Eighty-Four', author: 'George Orwell', year: 1949 },
    { id: 5, title: 'The Hobbit', author: 'J. R. R. Tolkien', year: 1937 }
]

const fsDocument = new Document({
    id: 'id',
    index: [
        {
            field: 'title',
            tokenize: 'forward',
            optimize: true
        },
        {
            field: 'author',
            tokenize: 'forward',
            optimize: true,
            minlength: 3,
            context: {
                depth: 3,
                resolution: 2,
            },
        },
    ],
});

// add book data to indexeddb
bookData.forEach(async (book) => await indexeddb.books.put(book));
​
// retrieve data by ID
const result = await indexeddb.books.get(1);
console.log(`Dexie Book Test: ${JSON.stringify(result)}\n`)
}

export gave us a ton of trouble however. No matter what we try, the second console.log statement below -- the one that that prints the export results -- always executes before fsExport is done.

console.log(`Exporting...`)
const exportResult = await fsExport(fsDocument, indexeddb); // this isn't working right
console.log(`Export result: ${JSON.stringify(exportResult)}`)

// console log output below

// Export result: []
// Added key 'reg'
// Added key 'title.cfg'
// Added key 'title.map'
// Added key 'title.ctx'
// Added key 'author.cfg'
// Added key 'author.map'
// Added key 'author.ctx'
// Added key 'author.tag'
// Added key 'author.store'

We did end up managing to make it work. Here is the solution we devised as well as all of the other attempts that failed.

async function fsExport(document, indexeddb) {

    return new Promise(async (resolve) => {
        const keys = [];
        let processedKeys = 0;
        let totalKeys = 9; // totalKeys = (3 * #keys) + 3;
​
        document.export(async function (key, data) {
            indexeddb.index.put({ key: key, data: data }).then((key) => {
                console.log(`Added key '${key}'`);
                keys.push(key);
                processedKeys++;
​
                if (processedKeys === totalKeys) {
                    resolve(keys);
                }
            });
        });
    });

    // ALL NON-WORKING EXPORT ATTEMPTS BELOW THIS LINE 👇
​
    // return new Promise(async (resolve) => {
    //     const keys = [];
    //     const promises = [];
​
    //     const promisifiedExport = (callback) => {
    //         return new Promise((resolveExport) => {
    //             document.export((key, data) => {
    //                 const result = callback(key, data);
    //                 if (result instanceof Promise) {
    //                     promises.push(result);
    //                 }
    //                 resolveExport();
    //             });
    //         });
    //     };
​
    //     await promisifiedExport(async (key, data) => {
    //         const promise = indexeddb.index.put({ key: key, data: data }).then((key) => {
    //             console.log(`Added key '${key}'`);
    //             keys.push(key);
    //         });
​
    //         promises.push(promise);
    //     });
​
    //     await Promise.all(promises);
    //     resolve(keys);
    // });
​
    // return new Promise(async (resolve) => {
    //     const keys = [];
    //     const promises = [];
​
    //     document.export(function (key, data) {
    //         const promise = indexeddb.index.put({ key: key, data: data }).then((key) => {
    //             console.log(`Added key '${key}'`);
    //             keys.push(key);
    //         });
​
    //         promises.push(promise);
    //     });
​
    //     await Promise.all(promises);
    //     resolve(keys);
    // });
​
    // return new Promise(async (resolve) => {
    //     const keys = [];
    //     const promises = [];
​
    //     const asyncCallback = (key, data) => {
    //         const promise = indexeddb.index.put({ key: key, data: data }).then((key) => {
    //             console.log(`Added key '${key}'`);
    //             keys.push(key);
    //         });
    //         promises.push(promise);
    //         return promise;
    //     };
​
    //     document.export(asyncCallback);
​
    //     Promise.all(promises).then(() => {
    //         resolve(keys);
    //     });
    // });
​
    // return new Promise(async (resolve) => {
    //     const keys = [];
    //     await document.export(async function (key, data) {
    //         await indexeddb.index.put({ key: key, data: data }).then(key => {
    //             console.log(`Added key '${key}'`);
    //             keys.push(key);
    //             // return key;
    //         });
    //     });
    //     resolve(keys);
    // });
​
    // document.export(async function (key, data) {
    //     return await indexeddb.index.put({key: key, data: data}).then(async (key) => {
    //         console.log(`Added key '${key}'`);
    //         keys.push(key);
    //         const test = await indexeddb.index.get(key);
    //         console.log(test);
    //     });
    // });
​
    // document.export(async function(key, data) {
    //     return await indexeddb.index.put({key: key, data: data});
    //     // return new Promise(function(resolve) {
    //     //     indexeddb.index.put({key: key, data: data}).then(key => {
    //     //         console.log(`Added key '${key}'`);
    //     //         keys.push(key);
    //     //         resolve();
    //     //     });
    //     // });
    // });
​
    // try {
    //     const backups = [];
​
    //     await new Promise((resolve) => {
    //         document.export((key, data) => {
    //             const backup = { key: key, data: data };
    //             backups.push(backup);
    //         }).then(resolve);
    //     });
​
    //     await indexeddb.index.put(backups, INDEX_KEY);
    // } catch (err) {
    //     console.error(err);
    // }
​
    // const result = await document.export((key, data) => {
    //     console.log({key: key, data: data});
    //     keys.push(key);
    //     indexeddb.index.put({key: key, data: data});
    // });
    // return result;
​
    // await document.export(function (key, data) {
    //     return new Promise(async function (resolve) {
    //         // do the saving as async
    //         keys.push(key);
    //         console.log(key, data)
    //         await put(indexeddb, {key: key, data: data});
    //         resolve();
    //     });
    // });
​
    // await document.export(async (key, data) => {
    //     console.log({key: key, data: data})
    //     keys.push(key);
    //     await put(indexeddb, {key: key, data: data});
    // });
}

When it works, it outputs:

Exporting...
Added key 'reg'
Added key 'title.cfg'
Added key 'title.map'
Added key 'title.ctx'
Added key 'author.cfg'
Added key 'author.map'
Added key 'author.ctx'
Added key 'author.tag'
Added key 'author.store'
Export result: ["reg","title.cfg","title.map","title.ctx","author.cfg","author.map","author.ctx","author.tag","author.store"]

Since the document.export function is designed to work asynchronously using setTimeout, and it doesn't provide any clear indication when all of the keys have been processed, we had to devise a different approach.

The only potential problem with this solution is that it presupposes that we always know the value of totalKeys, which in this example is 9.

We did some testing and deduced that FlexSearch will always iterate (3 * numIndexes) + 3 times during an export call. Our Document index contains two indexes, so that's (3 * 2) + 3 = 9 as we can see in the output above, e.g., reg, title.cfg, title.map, etc.

Can we assume this is always true?

Are we missing something? Is there an easier way?

We've also yet to figure out an import solution (i.e., importing data from IndexedDB to FlexSearch) so any pointers there is appreciated.

zanzlender commented 1 year ago

I don't have all the answers you are looking for, but check out the issue I wrote #384 . I have a different problem but I posted a mostly working solution for importing/exporting... Maybe that helps