facebookresearch / faiss

A library for efficient similarity search and clustering of dense vectors.
https://faiss.ai
MIT License
31.64k stars 3.65k forks source link

Implement add_preassigned for GPU #3908

Open mdouze opened 1 month ago

mdouze commented 1 month ago

GPU search supports search_preassigned but not add_preassigned which is required when an arbitrary quantizer is used.

https://github.com/facebookresearch/faiss/blob/main/faiss/gpu/GpuIndexIVF.h#L101

It would be useful to implement it.

asadoughi commented 1 month ago

Do you have a specific use case in mind that this functionality would unblock?

mlomeli1 commented 3 weeks ago

Hi @asadoughi , I have created a toy example to repro and to showcase how our work in the research team could benefit from this functionality:

# Example: add_preassigned() not supported for gpu indexes
import os
import time

from faiss.contrib import ivf_tools

os.environ["CUDA_VISIBLE_DEVICES"] = "3,4,5"
import faiss
import faiss.contrib.torch_utils
import numpy as np
import torch

current_cache_index = 0
head_dim = 128
nlist = 1024
nprobe = 512
factory_str = f"IVF{nlist},Flat"
faiss_gpu_resources = faiss.StandardGpuResources()
co = faiss.GpuClonerOptions()
total_samples = 10000000
co.use_raft = False

xk = torch.rand(total_samples, head_dim).to(device="cuda", dtype=torch.float32)
faiss_index = faiss.index_factory(head_dim, factory_str, faiss.METRIC_INNER_PRODUCT)
faiss_index.by_residual = False
faiss.extract_index_ivf(faiss_index).nprobe = nprobe
faiss_index.is_trained = True
km = faiss.Kmeans(20, 50)
km.train(xk[:, :20].cpu())
alt_quantizer = km.index
a = alt_quantizer.search(xk[:, :20].cpu(), 1)[1].ravel()
# (optional) fake coarse quantizer
fake_centroids = np.zeros((nlist, head_dim), dtype="float32")
faiss_index.quantizer.add(fake_centroids)
gpu_index = faiss.index_cpu_to_gpu(
    faiss_gpu_resources,
    xk.device.index,
    faiss_index,
    co,
)
ivf_tools.add_preassigned(gpu_index, xk.cpu().numpy(), a.cpu().numpy())

Error:

AttributeError:'GpuIndexIVFFlat' object has no attribute 'add_core'
--------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/tmp/ipykernel_1833376/3474978841.py in ?()
     39     xk.device.index,
     40     faiss_index,
     41     co,
     42 )
---> 43 ivf_tools.add_preassigned(gpu_index, xk.cpu().numpy(), a.cpu().numpy())
/mnt/xarfuse/uid-226154/f1d542b2-seed-nspid4026531836_cgpid40078521-ns-4026531841/faiss/contrib/ivf_tools.py in ?(index_ivf, x, a, ids)
     20     assert d == index_ivf.d
     21     if ids is not None:
     22         assert ids.shape == (n, )
     23         ids = faiss.swig_ptr(ids)
---> 24     index_ivf.add_core(
     25         n, faiss.swig_ptr(x), ids, faiss.swig_ptr(a)
     26     )
/mnt/xarfuse/uid-226154/f1d542b2-seed-nspid4026531836_cgpid40078521-ns-4026531841/swigfaiss_gpu.py in ?(self, name)
> 11320     __getattr__ = lambda self, name: _swig_getattr(self, GpuIndexIVFFlat, name)
/mnt/xarfuse/uid-226154/f1d542b2-seed-nspid4026531836_cgpid40078521-ns-4026531841/swigfaiss_gpu.py in ?(self, class_type, name)
     68         return self.this.own()
     69     method = class_type.__swig_getmethods__.get(name, None)
     70     if method:
     71         return method(self)
---> 72     raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name))
AttributeError: 'GpuIndexIVFFlat' object has no attribute 'add_core'