Closed MlLearnerAkash closed 2 years ago
Thanks @Confusezius .
@MlLearnerAkash
def _create_index(self, dimension)
...
class FaissNN(object):
def __init__(self, on_gpu: bool = False, num_workers: int = 4) -> None:
"""FAISS Nearest neighbourhood search.
Args:
on_gpu: If set true, nearest neighbour searches are done on GPU.
num_workers: Number of workers to use with FAISS for similarity search.
"""
faiss.omp_set_num_threads(num_workers)
# get all gpus number
ngpus = faiss.get_num_gpus()
print("number of GPUs (use faiss):", ngpus)
self.on_gpu = on_gpu
self.search_index = None
def _gpu_cloner_options(self):
return faiss.GpuClonerOptions()
def _index_to_gpu(self, index):
if self.on_gpu:
# For the non-gpu faiss python package, there is no GpuClonerOptions
# so we can not make a default in the function header.
# return faiss.index_cpu_to_gpu(
# faiss.StandardGpuResources(), 0, index, self._gpu_cloner_options()
# )
return faiss.index_cpu_to_all_gpus(index)
return index
def _index_to_cpu(self, index):
if self.on_gpu:
return faiss.index_gpu_to_cpu(index)
return index
def _create_index(self, dimension):
# reference to the official methods on multiple gpus
cpu_index = faiss.IndexFlatL2(dimension)
if self.on_gpu:
# return faiss.GpuIndexFlatL2(
# faiss.StandardGpuResources(), dimension, faiss.GpuIndexFlatConfig()
# )
return faiss.index_cpu_to_all_gpus( # build the index
cpu_index
)
# return faiss.IndexFlatL2(dimension)
return cpu_index
def fit(self, features: np.ndarray) -> None:
"""
Adds features to the FAISS search index.
Args:
features: Array of size NxD.
"""
if self.search_index:
self.reset_index()
self.search_index = self._create_index(features.shape[-1])
self._train(self.search_index, features)
self.search_index.add(features)
def _train(self, _index, _features):
pass
def run(
self,
n_nearest_neighbours,
query_features: np.ndarray,
index_features: np.ndarray = None,
) -> Union[np.ndarray, np.ndarray, np.ndarray]:
"""
Returns distances and indices of nearest neighbour search.
Args:
query_features: Features to retrieve.
index_features: [optional] Index features to search in.
"""
if index_features is None:
return self.search_index.search(query_features, n_nearest_neighbours)
# Build a search index just for this search.
search_index = self._create_index(index_features.shape[-1])
self._train(search_index, index_features)
search_index.add(index_features)
return search_index.search(query_features, n_nearest_neighbours)
def save(self, filename: str) -> None:
faiss.write_index(self._index_to_cpu(self.search_index), filename)
def load(self, filename: str) -> None:
self.search_index = self._index_to_gpu(faiss.read_index(filename))
def reset_index(self):
if self.search_index:
self.search_index.reset()
self.search_index = None
...
@TheWangYang Thanks for sharing the update to the common.py script. Can you also share what else you have changed to train on multiple GPUs? Ideally would be great to point to a repo with the updated files, if possible.
The code currently does not support multi-gpu usage explicitly, and what you could do depends on what you want to do:
faiss
, consider checking out thefaiss
repo in more detail, from what I know, they do provide multi-gpu support (https://github.com/facebookresearch/faiss/blob/main/tutorial/python/5-Multiple-GPUs.py)