fastrepl / canary

Canary provides "Ask AI" for any developer-facing product. Open-source and self-hostable. 🐤🐦🐧
https://docs.getcanary.dev
MIT License
3 stars 0 forks source link

Create HNSW index #18

Closed yujonglee closed 6 days ago

yujonglee commented 1 week ago

We are missing HNSW index. Need way to set dimension size in db before proceed.

yujonglee commented 1 week ago
defmodule Canary.Sources.Document.Migration do
  use Ecto.Migration

  @index_name "search_index"
  @table_name "source_documents"
  @table_vector_field "content_embedding"
  @table_id_field "id"
  @distance_metric "vector_cosine_ops"

  def up do
    hnsw_up()
    bm25_up()
  end

  def down do
    hnsw_down()
    bm25_down()
  end

  defp hnsw_up() do
    execute("""
    CREATE INDEX ON #{@table_name}
    USING hnsw (#{@table_vector_field} #{@distance_metric});
    """)
  end

  defp hnsw_down() do
    execute("""
    DROP INDEX #{@table_name};
    """)
  end

  defp bm25_up() do
    execute("""
    CALL paradedb.create_bm25(
      index_name => '#{@index_name}',
      table_name => '#{@table_name}',
      key_field => '#{@table_id_field}',
      text_fields => '#{Jason.encode!(%{content: %{tokenizer: %{type: "ngram", min_gram: 4, max_gram: 6, prefix_only: true}}})}'
    );
    """)
  end

  defp bm25_down() do
    execute("""
    CALL paradedb.drop_bm25('#{@table_name}');
    """)
  end
yujonglee commented 6 days ago

10b822ad01a09d5a820a9e01268930c83a33e084