Closed yujonglee closed 6 days ago
defmodule Canary.Sources.Document.Migration do
use Ecto.Migration
@index_name "search_index"
@table_name "source_documents"
@table_vector_field "content_embedding"
@table_id_field "id"
@distance_metric "vector_cosine_ops"
def up do
hnsw_up()
bm25_up()
end
def down do
hnsw_down()
bm25_down()
end
defp hnsw_up() do
execute("""
CREATE INDEX ON #{@table_name}
USING hnsw (#{@table_vector_field} #{@distance_metric});
""")
end
defp hnsw_down() do
execute("""
DROP INDEX #{@table_name};
""")
end
defp bm25_up() do
execute("""
CALL paradedb.create_bm25(
index_name => '#{@index_name}',
table_name => '#{@table_name}',
key_field => '#{@table_id_field}',
text_fields => '#{Jason.encode!(%{content: %{tokenizer: %{type: "ngram", min_gram: 4, max_gram: 6, prefix_only: true}}})}'
);
""")
end
defp bm25_down() do
execute("""
CALL paradedb.drop_bm25('#{@table_name}');
""")
end
10b822ad01a09d5a820a9e01268930c83a33e084
We are missing HNSW index. Need way to set dimension size in db before proceed.