nextapps-de / flexsearch

Next-Generation full text search library for Browser and Node.js
Apache License 2.0
12.33k stars 489 forks source link

Imported index doesn't return enriched results #358

Closed alejoar closed 8 months ago

alejoar commented 1 year ago

Demo:

Create and export an index:

const fs = require("fs");
const { Document } = require("flexsearch");

const exportIndex = async () => {
  // Create and store an index
  const index = new Document({
    document: {
      id: "id",
      index: ["content"],
      store: true,
    },
    tokenize: "forward",
  });

  const data = [
    { id: 1, content: "I am foo" },
    { id: 2, content: "I am bar" },
    { id: 3, content: "I am baz" },
  ];

  data.map((item) => {
    index.add(item);
  });

  console.log(
    "search original index:",
    JSON.stringify(index.search("foo", { enrich: true }))
  );

  let dataToExport = [];

  await index.export((key, data) => {
    dataToExport.push({ [key]: data === undefined ? "null" : data });
  });

  // give it a second for the callback to finish  since it runs async
  await new Promise((resolve) => setTimeout(resolve, 1000));
  fs.writeFileSync("search-test.json", JSON.stringify(dataToExport), {
    encoding: "utf8",
  });
};

Now import it:

const importIndex = async () => {
  // Create a new index into which we are going to load the previous exported index
  let loadedIndex = new Document({
    document: {
      id: "id",
      index: ["content"],
      store: true,
    },
    tokenize: "forward",
  });

  const indexData = JSON.parse(fs.readFileSync("search-test.json", "utf8"));
  indexData.map((ob) => {
    for (const [key, value] of Object.entries(ob)) {
      loadedIndex.import(key, value);
    }
  });

  console.log(
    "search imported index:",
    JSON.stringify(loadedIndex.search("foo", { enrich: true }))
  );
};

Run everything:

const run = async () => {
  await exportIndex();
  await importIndex();
};

run();

Output:

search original index: [{"field":"content","result":[{"id":1,"doc":{"id":1,"content":"I am foo"}}]}]
search imported index: [{"field":"content","result":[{"id":1}]}]
alejoar commented 1 year ago

Looks like tags also get lost after import

maxhoffmann commented 1 year ago

I’m running into the same issue and can confirm it

z88kat commented 1 year ago

The only solution I found was to store the original data set and restore using index.add

thexeos commented 1 year ago

There is a bug in the way exportDocument works: while store and tag are both global and point to individual documents, they are being exported within the scope of the field (aka individual index). Then, when re-imported, they are not handled a global in importDocument and are passed to importIndex method where they are discarded.

I've created a PR that addresses this issue.

zanzlender commented 1 year ago

I'm running into the same issue when normally adding Document records:

const transcriptsJson = (await JSON.parse(file)) as Array<{
  id: number;
  url: string;
  transcript: Array<{
    timestamp: string;
    transcript: string;
  }>;
}>;

const flexIndex = new Document({
  tokenize: "full",
  document: {
    id: "record:id",
    index: ["record:url", "record:transcript[]:transcript"],
  },
});

transcriptsJson.forEach((_video) => {
  flexIndex.add({
    record: {
      id: _video.id,
      url: _video.url,
      transcript: [..._video.transcript],
    },
  });
});

const x = flexIndex.search("youtube", {
  limit: 5,
  suggest: true,
  index: ["record:url", "record:transcript[]:transcript"],
  enrich: true,
});
console.log(x);

which returns

image

Exter-dg commented 1 year ago

Unable to import content.store. Please fix this... The library isn't useful when one cannot retrieve previously stored data...