nextapps-de / flexsearch

Next-Generation full text search library for Browser and Node.js
Apache License 2.0
12.49k stars 491 forks source link

Is there a way to create an offline index in PHP or Python? #442

Open ddofborg opened 4 months ago

ddofborg commented 4 months ago

I would like to use FS on the client side, but precompute the index offline, so the client can start fast. Is there a way to do so?

donbowman commented 1 month ago

Here's a WIP for mine w/ wordpress, doing it offline. Its not finished.

npm install flexsearch
npm install html-to-text
const { convert } = require('html-to-text');
const flexsearch = require('flexsearch');

function h2t(body) {
    const options = {
        wordwrap: 130,
    };
    return convert(body, options);
}

async function getFAQ(index, url) {
    const fp = fetch(url);
    let num = await fp.then(response => {
        if (!response.ok) {
            return [];
        }
        return response.json();
    }).then(faqs => {
        let num = 0;
        for (const faq of faqs) {
            num = num + 1;
            console.log(h2t(faq.title.rendered));
            let doc = {
                "title": h2t(faq.title.rendered),
                "excerpt": h2t(faq.excerpt.rendered),
                "content": h2t(faq.content.rendered),
            };
            index.add(doc);
            /*
            console.log(h2t(faq.title.rendered));
            console.log(h2t(faq.excerpt.rendered));
            console.log(h2t(faq.content.rendered));
            */
        }
        return num;
    });
    return num;
}

async function getFAQS(index, url) {
    let num = 50;
    for (let page = 1; num == 50; page++) {
        console.log("do page ", page, "num = ", num);
        const _url = `${url}?per_page=10&page=${page}`;
        num = await getFAQ(index, _url);
    }

}

async function createIndex() {
    const index = new flexsearch.Document({
        tokenize: "forward",
        optimize: true,
        resolution: 9,
        cache: 100,
        worker: true,
        document: {
            id: "id",
            tag: "tag",
            store: [
                "title", "excerpt", "content"
            ],
            index: [
            {
                field: "title",
                tokenize: "forward",
                optimize: true,
                resolution: 9
            },
            {
                field:  "excerpt",
                tokenize: "strict",
                optimize: true,
                resolution: 9,
                minlength: 3,
                context: {
                    depth: 1,
                    resolution: 3
                }
            },
            {
                field:  "content",
                tokenize: "strict",
                optimize: true,
                resolution: 9,
                minlength: 3,
                context: {
                    depth: 1,
                    resolution: 3
                }
            }
            ]
        }
    });

    const faq = "https://www.agilicus.com/wp-json/wp/v2/ufaq";
//    ?per_page=10"
    await getFAQS(index, faq);
    return index;
}

//    https://www.agilicus.com/wp-json/wp/v2/ufaq?per_page=100
//const index = await createIndex();

(async() => {
    console.log('before start');
    const index = await createIndex();
    console.log('after start');
    console.log(exp);
    const result = await index.search('connector', 10);
    console.log(result);
})();