Stevenic / vectra

Vectra is a local vector database for Node.js with features similar to pinecone but built using local files.
MIT License
321 stars 29 forks source link

Local embedding using transformers.js, no score #13

Closed dspasyuk closed 10 months ago

dspasyuk commented 10 months ago

Hi Guys, I am trying to generate embeddings locally using tranformer.js library and Xenova/all-MiniLM-L6-v2 model, embedding seems work fine but I get no score when queering the data. Any help is appreciated! Here is the code example:

`import { LocalIndex } from 'vectra'; import { dirname, join } from 'path'; import { fileURLToPath } from 'url'; const __dirname = dirname(fileURLToPath(import.meta.url)); import { pipeline } from '@xenova/transformers';

function vdb(){};

vdb.init = async function(){ this.index = new LocalIndex(join(__dirname, 'db')); this.indexCreate(); this.getVector = await pipeline( 'feature-extraction', 'Xenova/all-MiniLM-L6-v2' ); await this.addItem('apple'); await this.addItem('orange'); await this.addItem('red'); await this.addItem('blue'); await this.addItem('green'); await this.addItem('cherry'); await this.addItem('apricot'); await this.addItem('peanut'); await this.addItem('tomatoe'); await this.query("banana"); };

vdb.addItem = async function(text) { const output1 = await this.getVector(text, { pooling: 'mean', normalize: true, }); await this.index.insertItem({ vector: Array.from(output1.data), metadata: {text } }); }

vdb.indexCreate = async function(){ if (!await this.index.isIndexCreated()) { await this.index.createIndex(); console.log("Created") } }

vdb.query = async function(text) { const vector = await this.getVector(text); const results = await this.index.queryItems(Array.from(vector.data), 3); if (results.length > 0) { for (const result of results) { console.log(result); } } else { console.log(No results found.); } }

vdb.init();`

And here is the output: { item: { id: '8ab26839-b40c-4b2c-992d-34d30fe46413', metadata: { text: 'red' }, vector: [.........0.14675785601139069, 0.02359885536134243, -0.512697696685791, -0.7379060983657837, 1.8219693899154663, ... 284 more items ], norm: 7.798753068646911 }, score: NaN }

dspasyuk commented 10 months ago

Never mind, the issue was here:

await this.getVector(text, { pooling: 'mean', normalize: true, });