Closed ahaapple closed 4 months ago
The code
import { EmbeddingModel, FlagEmbedding } from "fastembed"; const embeddingModel = await FlagEmbedding.init({ model: EmbeddingModel.BGEBaseEN, }); import { sqeuclidean, cosine, inner, hamming, jaccard } from "simsimd"; let documents = [ // "passage: Hello, World!", // "query: Hello, World!", "passage: fastembed-js", // You can leave out the prefix but it's recommended "passage: fastembed-js is ", ]; console.time("Embedding time"); const embeddings = embeddingModel.passageEmbed(documents, 2); //Optional batch size. Defaults to 256 console.timeEnd("Embedding time"); for await (const batch of embeddings) { console.log(batch); const vec1 = new Float32Array(batch[0]); const vec2 = new Float32Array(batch[1]); console.time("cosine"); const distance = cosine(vec1, vec2); console.timeEnd("cosine"); console.time("cosineSimilarity"); const distance2 = cosineSimilarity(vec1, vec2); console.timeEnd("cosineSimilarity"); console.log("cosine Distance:", distance); console.log("cosine Distance:2", distance2); } function cosineSimilarity(vecA: Float32Array, vecB: Float32Array) { let dotProduct = 0; let magnitudeA = 0; let magnitudeB = 0; for (let i = 0; i < vecA.length; i++) { dotProduct += vecA[i] * vecB[i]; magnitudeA += vecA[i] ** 2; magnitudeB += vecB[i] ** 2; } magnitudeA = Math.sqrt(magnitudeA); magnitudeB = Math.sqrt(magnitudeB); // shouldn't be zero, but just in case return dotProduct / (magnitudeA * magnitudeB); }
[0.03ms] cosine [0.16ms] cosineSimilarity cosine Distance: 0.005549426190555096 cosine Distance:2 0.9963965251444867
Check (1 - cosineSimilarity(...)).
The code