CREATE TABLE dbpedia_3072 (
id bigserial primary key,
title text,
content text,
embedding vector(3072)
);
CREATE INDEX hnsw_cosine_index_dbpedia_3072 ON dbpedia_3072 USING vectors (embedding vector_cos_ops) WITH (options='[indexing.hnsw]
m = 16
ef_construction = 100
');
CREATE TABLE dbpedia_3072_scalar (
id bigserial primary key,
title text,
content text,
embedding vector(3072)
);
CREATE INDEX hnsw_cosine_index_dbpedia_3072_scalar ON dbpedia_3072_scalar USING vectors (embedding vector_cos_ops) WITH (options='[indexing.hnsw]
m = 16
ef_construction = 100
[indexing.hnsw.quantization.scalar]
');
Simulate 100 users randomly querying 3072-dimensional vectors for similar embeddings
SELECT 1 - (embedding <=> %s) AS cosine_similarity,title,content FROM dbpedia_3072 ORDER BY embedding <=> %s LIMIT 1
SELECT 1 - (embedding <=> %s) AS cosine_similarity,title,content FROM dbpedia_3072_scalar ORDER BY embedding <=> %s LIMIT 1
Result
dbpedia_3072
dbpedia_3072_scalar
avg
13.66 ms
12.46 ms
rps
732.4
765.5
max cpu / memory
max cpu 330% / 1033MB
max cpu 270% / 1300MB
index stat
SELECT tablename,idx_status,idx_size,idx_options FROM pg_vector_index_stat
Environment
docker tensorchord/pgvecto-rs:pg16-v0.3.0
Data preparation
dataset https://huggingface.co/datasets/Qdrant/dbpedia-entities-openai3-text-embedding-3-large-3072-100K
Test
Simulate 100 users randomly querying 3072-dimensional vectors for similar embeddings
Result
index stat
SELECT tablename,idx_status,idx_size,idx_options FROM pg_vector_index_stat