import fiftyone as fo
import fiftyone.brain as fob
import fiftyone.zoo as foz
dataset = foz.load_zoo_dataset(
"quickstart",
max_samples=10,
dataset_name="zzz",
drop_existing_dataset=True,
)
view = dataset[:5]
# optional: precompute embeddings
model = foz.load_zoo_model("clip-vit-base32-torch")
embeddings = view.compute_embeddings(model)
view.set_values("embeddings", embeddings.tolist())
index = fob.compute_similarity(
view, # although not recommended, this does work
model="clip-vit-base32-torch",
backend="mongodb",
brain_key="img_sim",
embeddings="embeddings",
)
print(index.total_index_size) # 5
print(index.index_size) # 5
# index is created even though embeddings already existed
assert index._index
# issues warnings for skipped IDs
embeddings, sample_ids, _ = index.compute_embeddings(dataset[3:6])
index.add_to_index(embeddings, sample_ids, overwrite=False, warn_existing=True)
print(index.total_index_size) # 6
print(index.index_size) # 5
# skips existing but no warnings
embeddings, sample_ids, _ = index.compute_embeddings(dataset[5:])
index.add_to_index(embeddings, sample_ids, overwrite=False)
print(index.total_index_size) # 10
print(index.index_size) # 5
del_view = dataset[4:6]
index.use_view(del_view)
print(index.total_index_size) # 10
print(index.index_size) # 2
# works even though some IDs are not in the index's original view
index.remove_from_index(sample_ids=del_view.values("id"))
print(index.total_index_size) # 8
print(index.index_size) # 0
index.use_view(dataset)
print(index.total_index_size) # 8
print(index.index_size) # 8
assert index.ready
# issues a warning rather than raising an error
puppies = dataset.take(5).sort_by_similarity("puppies")
# The MongoDB backend does not yet support views; the full index will instead be queried, which may result in fewer matches in your current view
sklearn backend
import fiftyone as fo
import fiftyone.brain as fob
import fiftyone.zoo as foz
dataset = foz.load_zoo_dataset(
"quickstart",
max_samples=10,
dataset_name="zzz",
drop_existing_dataset=True,
)
view = dataset[:5]
index = fob.compute_similarity(
view, # although not recommended, this does work
model="clip-vit-base32-torch",
backend="sklearn",
brain_key="img_sim",
embeddings="embeddings",
)
print(index.total_index_size) # 5
print(index.index_size) # 5
# issues warnings for skipped IDs
embeddings, sample_ids, _ = index.compute_embeddings(dataset[3:6])
index.add_to_index(embeddings, sample_ids, overwrite=False, warn_existing=True)
print(index.total_index_size) # 6
print(index.index_size) # 5
# skips existing but no warnings
embeddings, sample_ids, _ = index.compute_embeddings(dataset[5:])
index.add_to_index(embeddings, sample_ids, overwrite=False)
print(index.total_index_size) # 10
print(index.index_size) # 5
del_view = dataset[4:6]
index.use_view(del_view)
print(index.total_index_size) # 10
print(index.index_size) # 2
# works even though some IDs are not in the index's original view
index.remove_from_index(sample_ids=del_view.values("id"))
print(index.total_index_size) # 8
print(index.index_size) # 0
index.use_view(dataset)
print(index.total_index_size) # 8
print(index.index_size) # 8
puppies = dataset.take(5).sort_by_similarity("puppies")
Fixes issues ID'd in https://github.com/voxel51/fiftyone-brain/pull/168 and https://github.com/voxel51/fiftyone-brain/pull/170
mongodb backend
sklearn backend