ashvardanian / SimSIMD

Up to 200x Faster Dot Products & Similarity Metrics — for Python, Rust, C, JS, and Swift, supporting f64, f32, f16 real & complex, i8, and bit vectors using SIMD for both AVX2, AVX-512, NEON, SVE, & SVE2 📐
https://ashvardanian.com/posts/simsimd-faster-scipy/
Apache License 2.0
913 stars 51 forks source link

Wrong cosine result in js version #144

Closed ahaapple closed 3 months ago

ahaapple commented 3 months ago

The code

import { EmbeddingModel, FlagEmbedding } from "fastembed";

const embeddingModel = await FlagEmbedding.init({
  model: EmbeddingModel.BGEBaseEN,
});

import { sqeuclidean, cosine, inner, hamming, jaccard } from "simsimd";

let documents = [
  // "passage: Hello, World!",
  // "query: Hello, World!",
  "passage: fastembed-js",
  // You can leave out the prefix but it's recommended
  "passage: fastembed-js is ",
];

console.time("Embedding time");
const embeddings = embeddingModel.passageEmbed(documents, 2); //Optional batch size. Defaults to 256
console.timeEnd("Embedding time");

for await (const batch of embeddings) {
  console.log(batch);
  const vec1 = new Float32Array(batch[0]);
  const vec2 = new Float32Array(batch[1]);

  console.time("cosine");
  const distance = cosine(vec1, vec2);
  console.timeEnd("cosine");

  console.time("cosineSimilarity");
  const distance2 = cosineSimilarity(vec1, vec2);
  console.timeEnd("cosineSimilarity");

  console.log("cosine Distance:", distance);
  console.log("cosine Distance:2", distance2);
}

function cosineSimilarity(vecA: Float32Array, vecB: Float32Array) {
  let dotProduct = 0;
  let magnitudeA = 0;
  let magnitudeB = 0;

  for (let i = 0; i < vecA.length; i++) {
    dotProduct += vecA[i] * vecB[i];
    magnitudeA += vecA[i] ** 2;
    magnitudeB += vecB[i] ** 2;
  }

  magnitudeA = Math.sqrt(magnitudeA);
  magnitudeB = Math.sqrt(magnitudeB);

  // shouldn't be zero, but just in case
  return dotProduct / (magnitudeA * magnitudeB);
}
[0.03ms] cosine
[0.16ms] cosineSimilarity
cosine Distance: 0.005549426190555096
cosine Distance:2 0.9963965251444867
ashvardanian commented 3 months ago

Check (1 - cosineSimilarity(...)).