Open chaucerling opened 1 month ago
Due I'm interested too adding here some other info:
const { QdrantClient } = require('@qdrant/qdrant-js');
const { pipeline } = require('@xenova/transformers');
const client = new QdrantClient({ url: "http://localhost:6333" });
async function createCollectionWithMultipleVectors() {
await client.createCollection('text_collection', {
vectors: {
dense: {
size: 1024, // BGE-M3 dense vector dimension
distance: 'Cosine'
},
sparse: {
size: 30522, // BGE-M3 vocabulary dimension
distance: 'Dot'
}
},
on_disk_payload: true
});
}
async function generateEmbeddings(text) {
const pipe = await pipeline('feature-extraction', 'Xenova/bge-m3');
const result = await pipe(text, { pooling: 'mean', normalize: true });
return {
dense: Array.from(result.dense_embeddings.data),
sparse: {
indices: Array.from(result.sparse_embeddings.indices),
values: Array.from(result.sparse_embeddings.values)
}
};
}
async function insertTextWithMultipleVectors(text) {
const embeddings = await generateEmbeddings(text);
await client.upsert('text_collection', {
wait: true,
points: [
{
id: Date.now().toString(), // Timestamp as unique ID
vector: embeddings,
payload: {
'text': text
}
}
]
});
}
async function hybridSearch(text, limit = 5) {
const embeddings = await generateEmbeddings(text);
const searchResult = await client.search('text_collection', {
vector: {
dense: embeddings.dense,
sparse: embeddings.sparse
},
limit: limit
});
return searchResult;
}
async function main() {
try {
await createCollectionWithMultipleVectors();
console.log("Successfully created the collection");
const text = "Text example";
await insertTextWithMultipleVectors(text);
console.log("Successfully inserted text");
} catch (error) {
console.error("There was an errror:", error);
}
}
main();
const { QdrantClient } = require('@qdrant/qdrant-js');
const { pipeline } = require('@xenova/transformers');
const Anthropic = require('@anthropic-ai/sdk');
const qdrantClient = new QdrantClient({ url: "http://localhost:6333" });
const anthropic = new Anthropic({
apiKey: 'your-anthropic-api-key-here'
});
async function generateEmbeddings(text) {
const pipe = await pipeline('feature-extraction', 'Xenova/bge-m3');
const result = await pipe(text, { pooling: 'mean', normalize: true });
return {
dense: Array.from(result.dense_embeddings.data),
sparse: {
indices: Array.from(result.sparse_embeddings.indices),
values: Array.from(result.sparse_embeddings.values)
}
};
}
async function hybridSearch(text, limit = 5) {
const embeddings = await generateEmbeddings(text);
const searchResult = await qdrantClient.search('text_collection', {
vector: {
dense: embeddings.dense,
sparse: embeddings.sparse
},
limit: limit
});
return searchResult;
}
async function askClaude(context, question) {
const message = await anthropic.messages.create({
model: 'claude-3-opus-20240229',
max_tokens: 1000,
messages: [
{
role: 'user',
content: `Context: ${context}\n\nQuestion: ${question}\n\nPlease answer the question based on the given context.`
}
]
});
return message.content[0].text;
}
async function searchAndAskClaude(query) {
try {
// Execute hybrid search
const searchResults = await hybridSearch(query);
// Prepare context
const context = searchResults.map(result => result.payload.text).join('\n\n');
// Ask Claude
const answer = await askClaude(context, query);
return answer;
} catch (error) {
console.error("There was an error:", error);
return "Sorry there was an error.";
}
}
// Usage example
async function main() {
const query = "Example question?";
const response = await searchAndAskClaude(query);
console.log("Claude answer:", response);
}
main();
So far seems ollama (supporting bge-m3) it's not able to get out his sparse vectors, Qdrant own fastembed didn't support bge-m3 so far, I hope llama.cpp is able to extract them.
Langchain has already the classes to manage sparse embeddings and hybrid search https://api.python.langchain.com/en/latest/qdrant_api_reference.html Next another example code using langchain
const { QdrantClient } = require("@qdrant/qdrant-js");
const { SparseEmbeddings } = require("langchain-qdrant/sparse_embeddings");
const { BGEM3Embeddings } = require("langchain/embeddings/bge_m3");
const { QdrantVectorStore } = require("langchain/vectorstores/qdrant");
class BGEM3SparseEmbeddings extends SparseEmbeddings {
constructor(config = {}) {
super();
this.embeddings = new BGEM3Embeddings(config);
}
async embedDocuments(texts) {
const embeddings = await this.embeddings.embedDocuments(texts);
return embeddings.map(embedding => ({
indices: embedding.indices,
values: embedding.values
}));
}
async embedQuery(text) {
const embedding = await this.embeddings.embedQuery(text);
return {
indices: embedding.indices,
values: embedding.values
};
}
}
async function main() {
const denseEmbedder = new BGEM3Embeddings();
const sparseEmbedder = new BGEM3SparseEmbeddings();
const qdrant = new QdrantClient({ url: "http://localhost:6333" });
// Create a collection for hybrid embeddings
await qdrant.createCollection("hybrid_collection", {
vectors: {
dense: {
size: 1024,
distance: "Cosine",
},
sparse: {
index: {
on_disk: true,
},
},
},
});
const docs = [
"First document",
"Second document",
"Third document"
];
const denseEmbeddings = await denseEmbedder.embedDocuments(docs);
const sparseEmbeddings = await sparseEmbedder.embedDocuments(docs);
// Insert documents with both embedding types
for (let i = 0; i < docs.length; i++) {
await qdrant.upsert("hybrid_collection", {
wait: true,
points: [
{
id: i.toString(),
vector: {
dense: denseEmbeddings[i],
sparse: sparseEmbeddings[i],
},
payload: { text: docs[i] },
},
],
});
}
// Create QdrantVectorStore instance
const vectorStore = await QdrantVectorStore.fromExistingCollection(
denseEmbedder,
{
client: qdrant,
collectionName: "hybrid_collection",
}
);
// Execute hybrid search
const query = "Query sample";
const denseQueryEmbedding = await denseEmbedder.embedQuery(query);
const sparseQueryEmbedding = await sparseEmbedder.embedQuery(query);
const searchResult = await qdrant.search("hybrid_collection", {
vector: {
dense: denseQueryEmbedding,
sparse: sparseQueryEmbedding,
},
limit: 5,
});
console.log("Hybrid search results:", searchResult);
// Use LangChain for similar search (only dense embeddings)
const langchainResults = await vectorStore.similaritySearch(query, 5);
console.log("LangChain results:", langchainResults);
}
main().catch(console.error);
Describe the feature you'd like Qdrant is support hybrid search https://qdrant.tech/articles/hybrid-search/