-- 1)
CREATE TABLE imdb_reviews (
id SERIAL PRIMARY KEY,
imdb_id int NOT NULL UNIQUE,
review text,
positive_review bool
);
-- 2) populate table
INSERT INTO imdb_reviews (imdb_id,review, positive_review) VALUES %s
-- 3) generate review_embedding column via the cloud
-- 4) Run the query
SELECT
forall.imdb_id,
nearest_per_id.near_imdb_ids, nearest_per_id.imdb_dists
FROM
(
SELECT
imdb_id, review_embedding
FROM
imdb_reviews
LIMIT 100000
) AS forall
JOIN LATERAL (
SELECT
ARRAY_AGG(imdb_id) AS near_imdb_ids,
ARRAY_AGG(imdb_dist) AS imdb_dists
FROM
(
SELECT
t2.imdb_id,
cos_dist(forall.review_embedding, t2.review_embedding) AS imdb_dist
FROM
imdb_reviews t2
ORDER BY
forall.review_embedding <-> t2.review_embedding
LIMIT
5
) AS __unused_name
) nearest_per_id ON TRUE
ORDER BY
forall.imdb_id;
Expected: the query in step 4 to fail with error Operator <-> can only be used inside of an index
Seeing: The query runs a sequential scan with the following query plan:
Query:
Expected: the query in step 4 to fail with error
Operator <-> can only be used inside of an index
Seeing: The query runs a sequential scan with the following query plan:
Once the index is created, the plan looks like the following: