Closed databyjp closed 8 months ago
Querying a single-tenant collection with cross-references to a MT collection, the query fails if a filter is specified.
can you provide a full example? AFAIK references Non-MT => MT should not be possible
Ahhhh. It was an interesting experience trying to reproduce it.
If I'm using this correctly, when I add those incorrect refs using batch.add_reference
the client isn't showing me any errors when it should.
image: semitechnologies/weaviate:1.23.10
+ py client 4.4.4
import weaviate
import weaviate.classes.config as wc
from weaviate.util import generate_uuid5
from weaviate.classes.tenants import Tenant
import os
# Connect to a local Weaviate instance
client = weaviate.connect_to_local(
headers={
"X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY"),
"X-Cohere-Api-Key": os.getenv("COHERE_APIKEY"),
}
)
# Delete existing collections if they exist
client.collections.delete(["Movie", "ReviewMT"])
# Create the ReviewMT collection
reviews = client.collections.create(
name="ReviewMT",
properties=[
wc.Property(name="movie_id", data_type=wc.DataType.INT),
wc.Property(name="review_id", data_type=wc.DataType.TEXT, skip_vectorization=True),
wc.Property(name="content", data_type=wc.DataType.TEXT),
],
vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(),
multi_tenancy_config=wc.Configure.multi_tenancy(enabled=True)
)
reviews.tenants.create([Tenant(name="Tenant0"), Tenant(name="Tenant1")])
# Create the Movie collection
movies = client.collections.create(
name="Movie",
properties=[
wc.Property(name="title", data_type=wc.DataType.TEXT),
wc.Property(name="tmdb_id", data_type=wc.DataType.INT),
],
references=[
wc.ReferenceProperty(name="hasReview", target_collection="ReviewMT")
],
vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(),
)
# Sample movie data
movies_data = [
{"title": "Movie 1", "tmdb_id": 1},
{"title": "Movie 2", "tmdb_id": 2},
]
# Sample review data
reviews_data = [
{"movie_id": 1, "review_id": "review1", "content": "Great movie!", "tenant": "Tenant0"},
{"movie_id": 2, "review_id": "review2", "content": "Not bad.", "tenant": "Tenant1"},
]
for review in reviews_data:
review_uuid = generate_uuid5(review["review_id"])
reviews_t = reviews.with_tenant(review["tenant"])
reviews_t.data.insert(
properties=review,
uuid=review_uuid,
)
# # Adding movies
# Insert 1: This fails (as it should)
try:
for i, movie in enumerate(movies_data):
movie_uuid = generate_uuid5(str(movie["tmdb_id"]))
movies.data.insert(
properties=movie,
uuid=movie_uuid,
references={"hasReview": generate_uuid5(reviews_data[i]["review_id"])}
)
except:
response = movies.aggregate.over_all(total_count=True)
print(response.total_count)
# Insert 2: This fails (as it should)
with movies.batch.dynamic() as batch:
for i, movie in enumerate(movies_data):
movie_uuid = generate_uuid5(str(movie["tmdb_id"]))
batch.add_object(
properties=movie,
uuid=movie_uuid,
references={"hasReview": generate_uuid5(reviews_data[i]["review_id"])}
)
print(len(movies.batch.failed_objects))
# Insert 3: But this works
with movies.batch.dynamic() as batch:
for i, movie in enumerate(movies_data):
movie_uuid = generate_uuid5(str(movie["tmdb_id"]))
batch.add_object(
properties=movie,
uuid=movie_uuid,
)
batch.add_reference(
from_uuid=movie_uuid,
from_property="hasReview",
to=generate_uuid5(reviews_data[i]["review_id"])
)
print(len(movies.batch.failed_objects))
print(len(movies.batch.failed_references))
client.close()
Querying a single-tenant collection with cross-references to a MT collection, the query fails if a filter is specified.
For example, this throws
With:
But commenting out the
filters
line makes it work again. (Full collection creation / import script available if needed.)