Summary

While running multigpu search its only utilizing one gpu compute, but I can see the VRAM usage for other gpus

Running on:

[ ] CPU
[x] GPU

Interface:

[ ] C++
[x] Python

Reproduction instructions

import faiss
import numpy as np
import time
from tqdm import tqdm

def match_faiss_clustered_multi(embeddings, k, threshold, method_params):
    build_start = time.time()
    quantizer = faiss.IndexFlatIP(embeddings.shape[1])  # the other index
    n_list = method_params["n_list_ratio"] * int(np.sqrt(len(embeddings)))
    faiss_index = faiss.IndexIVFFlat(quantizer, embeddings.shape[1], n_list)
    faiss_index.metric_type = faiss.METRIC_INNER_PRODUCT
    # assert not index.is_trained
    faiss_index.train(embeddings)
    # assert index.is_trained

    faiss_index.add(embeddings)  # add may be a bit slower as well
    if method_params["gpu"]:
        # create a CPU index
        gpu_index = faiss.index_cpu_to_all_gpus(
                faiss_index
        )  # transfer the index to GPU
        faiss_index = gpu_index
    faiss_index.nprobe = method_params["n_probe"]

    build_end = time.time()

    n_neighbours = []

    start_time = time.time()
    for emb in tqdm(embeddings):
        dist, array_knn_indices_gpu = faiss_index.search(emb.reshape(1, -1), k)
        possibly_same_ids = array_knn_indices_gpu[dist > threshold]
        # minus searched vector itself
        n_neighbours.append(len(possibly_same_ids) - 1)
    end_time = time.time()

    hist, _ = np.histogram(n_neighbours, bins=(k + 1) // 2, density=True)

    return end_time - start_time, build_end - build_start

k = 100
xb = np.random.randn(3000000, 384).astype(np.float32)
print(match_faiss_clustered_multi(xb, k, 0.1, {"n_probe": 20, "n_list_ratio": 4, "gpu": True}))

facebookresearch / faiss

Only one gpu is getting utilized for Multi gpu search #2345

Summary

Reproduction instructions