Poor recall/throughput perf vs. pgvector?

I tried comparing the performance of pgvectorscale (HEAD) with pgvector's HNSW (HEAD) with ann-benchmark and see the following recall/throughput comparison. The plot attached is for glove-100 dataset, and I see similar plot for deep-96. I've included the ann-benchmark code & config based on pgvectorscale recommended values below. Is this expected?

glove-100-angular

module.py

import sys

import pgvector.psycopg
import psycopg

from ..base.module import BaseANN

class TimescaleDiskANN(BaseANN):
    def __init__(self, metric, method_param):
        self._metric = metric
        self._m = method_param['M']
        self._ef_construction = method_param['efConstruction']
        self._cur = None

        if metric == "angular":
            self._query = "SELECT id FROM items ORDER BY embedding <=> %s LIMIT %s"
        elif metric == "euclidean":
            self._query = "SELECT id FROM items ORDER BY embedding <-> %s LIMIT %s"
        else:
            raise RuntimeError(f"unknown metric {metric}")

    def fit(self, X):
        conn = psycopg.connect(host="localhost", user="postgres", password="postgres", dbname="postgres", autocommit=True)
        pgvector.psycopg.register_vector(conn)
        cur = conn.cursor()
        cur.execute("DROP TABLE IF EXISTS items")
        cur.execute("CREATE TABLE items (id int, embedding vector(%d))" % X.shape[1])
        cur.execute("ALTER TABLE items ALTER COLUMN embedding SET STORAGE PLAIN")
        print("copying data...")
        with cur.copy("COPY items (id, embedding) FROM STDIN") as copy:
            for i, embedding in enumerate(X):
                copy.write_row((i, embedding))
        print("creating index...")
        if self._metric == "angular":
            print("CREATE INDEX ON items USING diskann (embedding) WITH (num_neighbors = %d, search_list_size = %d)" % (self._m, self._ef_construction))
            cur.execute(
                "CREATE INDEX ON items USING diskann (embedding) WITH (num_neighbors = %d, search_list_size = %d)" % (self._m, self._ef_construction)
            )
        elif self._metric == "euclidean":
            cur.execute("CREATE INDEX ON items USING diskann (embedding) WITH (num_neighbors = %d, search_list_size = %d)" % (self._m, self._ef_construction))
        else:
            raise RuntimeError(f"unknown metric {self._metric}")
        print("done!")
        self._cur = cur

    def set_query_arguments(self, query_search_list_size):
        self.query_search_list_size, self.query_rescore = query_search_list_size
        self._cur.execute("SET diskann.query_search_list_size = %d" % self.query_search_list_size)
        self._cur.execute("SET diskann.query_rescore = %d" % self.query_rescore)

    def query(self, v, n):
        self._cur.execute(self._query, (v, n), binary=True, prepare=True)
        return [id for id, in self._cur.fetchall()]

    def get_memory_usage(self):
        if self._cur is None:
            return 0
        self._cur.execute("SELECT pg_relation_size('items_embedding_idx')")
        return self._cur.fetchone()[0] / 1024

    def __str__(self):
      return f"TimescaleDiskANN(num_neighbors={self._m}, search_list_size={self._ef_construction}, query_search_list_size={self.query_search_list_size}, query_rescore={self.query_rescore})"

config.yml

float:
  any:
  - base_args: ['@metric']
    constructor: TimescaleDiskANN
    disabled: false
    docker_tag:
    module: ann_benchmarks.algorithms.timescale_diskann
    name: timescale_diskann
    run_groups:
      M-50:
        arg_groups: [{M: 50, efConstruction: 100}]
        args: {}
        query_args: [[[35, 115], [35, 200], [35, 300], [35, 400], [50, 115], [50, 200], [50, 300], [50, 400], [75, 115], [75, 200], [75, 300], [75, 400], [100, 115], [100, 200], [100, 300], [100, 400]]]
      M-100:
        arg_groups: [{M: 100, efConstruction: 200}]
        args: {}
        query_args: [[[35, 115], [35, 200], [35, 300], [35, 400], [50, 115], [50, 200], [50, 300], [50, 400], [75, 115], [75, 200], [75, 300], [75, 400], [75, 400], [100, 115], [100, 200], [100, 300], [100, 400]]]

timescale / pgvectorscale

Poor recall/throughput perf vs. pgvector? #116