facebookresearch / faiss

A library for efficient similarity search and clustering of dense vectors.
https://faiss.ai
MIT License
30.87k stars 3.6k forks source link

GpuIndexIVFPQ unreasonably slow on GIST1M #1980

Closed DeMoriarty closed 2 years ago

DeMoriarty commented 3 years ago

Summary

Search speed of GpuIndexIVFPQ on GIST1M dataset is unexpectedly slow Is there something wrong with my config? I used the exact same configuration to benchmark on Sift1M, the search speed is 10x to 100x faster than on Gist1M

Platform

GPU: Tesla T4

OS: Ubuntu 18.04.5 LTS

Faiss version: faiss-gpu-1.7.1.post2

Installed from: pip

Faiss compilation options: -

Running on:

Interface:

Reproduction instructions

Here is dataset loader:

from urllib.request import urlretrieve
import tarfile
from pathlib import Path

class BaseDataset(object):
    def __init__(self, path):
        self.path = Path(path)

    def download(self):
        pass

    def vecs_train(self):
        # Return a vector (np.array with NxD) or a generator
        pass

    def vecs_base(self):
        # Return a vector (np.array with NxD) or a generator
        pass

    def vecs_query(self):
        # Return a vector (np.array with NxD) or a generator
        pass

    def groundtruth(self):
        # Return a vector (np.array with NxD) or a generator
        pass

    def D(self):
        vecs = self.vecs_train()
        return vecs.shape[1]

class Gist1m(BaseDataset):
  def __init__(self, path,device="cuda:0"):
    self.device=device
    super().__init__(path=path)

  def __str__(self):
    return "Gist1m(path={})".format(self.path)

  def ivecs_read(self, fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    result = a.reshape(-1, d + 1)[:, 1:].copy()
    return result

  def fvecs_read(self, fname):
    a = np.fromfile(fname, dtype='int32')
    d = a[0]
    result = a.reshape(-1, d + 1)[:, 1:].copy().view("float32")
    return result

  def download(self):
    self.path.mkdir(exist_ok=True, parents=True)
    tar_path = self.path / "gist.tar.gz"
    if not tar_path.exists():
      print("downloading")
      urlretrieve("ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz", tar_path)

    if not os.path.exists(self.path / "gist"):
      with tarfile.open(self.path / "gist.tar.gz", 'r:gz') as f:
        f.extractall(path=self.path)

  def vecs_train(self):
    vec_path = self.path / "gist/gist_learn.fvecs"
    assert vec_path.exists()
    return self.fvecs_read(fname=str(vec_path))

  def vecs_base(self):
    vec_path = self.path / "gist/gist_base.fvecs"
    assert vec_path.exists()
    return self.fvecs_read(fname=str(vec_path))

  def vecs_query(self):
    vec_path = self.path / "gist/gist_query.fvecs"
    assert vec_path.exists()
    return self.fvecs_read(fname=str(vec_path))

  def groundtruth(self):
    vec_path = self.path / "gist/gist_groundtruth.ivecs"
    assert vec_path.exists()
    return self.ivecs_read(fname=str(vec_path))

dataset = Gist1m("/content")
dataset.download()

Here is the test code

nlists= 4096
m = 8

res = faiss.StandardGpuResources()
index = faiss.index_factory(d_vector, f"IVF{nlists},PQ{m}")
co = faiss.GpuClonerOptions()
if m >= 48:
  co.useFloat16 = True
  co.useFloat16LookupTables = True
index = faiss.index_cpu_to_gpu(res, 0, index, co)
index.train(trainset)

n_splits = 100
n_data = baseset.shape[0]
chunk_size = n_data // n_splits
for i in range(n_splits):
  print(f"{i} / {n_splits}")
  start = i * chunk_size
  end = (i+1) * chunk_size
  sub_baseset = baseset[start:end, :]
  index.add(sub_baseset)
  del sub_baseset

for n_probe in [2**i for i in range(8)]:
  print("\nn_probe", n_probe)
  index.setNumProbes(n_probe)
  for k in [1, 10, 100]:
    tm = 0
    for i in range(30):
      topkv, topki = index.search(queryset, k=k)
    search_time = (time()-tm)/30
    print(f"q/s @ {k}",  queryset.shape[0] / search_time)
    recall =  (groundtruth[:, 0][:, None] == topki[:, :k]).sum(axis=-1)
    recall = recall.astype("float32").mean()
    print(f"recall@{k}", recall)
| d | m | nlists | distance | n_probe | query/s@1 | recall@1 | query/s@10 | recall@10 | query/s@100 | recall@100 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | 960 | 8 | 4096 | euclidean | 1 | 3197.7 | 0.081 | 3203.4 | 0.148 | 2860.502 | 0.202 | | 960 | 8 | 4096 | euclidean | 2 | 3136.337 | 0.084 | 3160.92 | 0.174 | 2827.389 | 0.273 | | 960 | 8 | 4096 | euclidean | 4 | 3102.01 | 0.087 | 3097.997 | 0.205 | 2773.464 | 0.375 | | 960 | 8 | 4096 | euclidean | 8 | 2970.457 | 0.089 | 2942.642 | 0.22 | 2650.819 | 0.442 | | 960 | 8 | 4096 | euclidean | 16 | 2745.254 | 0.086 | 2746.905 | 0.228 | 2477.426 | 0.474 | | 960 | 8 | 4096 | euclidean | 32 | 2399.624 | 0.089 | 2380.958 | 0.234 | 2169.055 | 0.491 | | 960 | 8 | 4096 | euclidean | 64 | 1917.804 | 0.089 | 1919.943 | 0.233 | 1768.085 | 0.498 | | 960 | 8 | 4096 | euclidean | 128 | 1389.42 | 0.089 | 1379.489 | 0.233 | 1299.476 | 0.506 |
mdouze commented 3 years ago

could you try to figure out if this is related to the vector dimensionality, ie. crop all input vectors to 128D ?