QuivrHQ / quivr

Opiniated RAG for integrating GenAI in your apps 🧠 Focus on your product rather than the RAG. Easy integration in existing products with customisation! Any LLM: GPT4, Groq, Llama. Any Vectorstore: PGVector, Faiss. Any Files. Anyway you want.
https://core.quivr.com
Other
36.54k stars 3.56k forks source link

Optimize get_knowledge_tree #3233

Open linear[bot] opened 1 month ago

linear[bot] commented 1 month ago

Current generate query:

EXPLAIN ANALYZE
WITH RECURSIVE knowledge_tree(id, file_name, url, extension, status, SOURCE, source_link, file_size, file_sha1, created_at, updated_at, metadata, is_folder, user_id, parent_id, sync_id, sync_file_id) AS
  (SELECT knowledge_1.id AS id,
          knowledge_1.file_name AS file_name,
          knowledge_1.url AS url,
          knowledge_1.extension AS extension,
          knowledge_1.status AS status,
          knowledge_1.source AS SOURCE,
          knowledge_1.source_link AS source_link,
          knowledge_1.file_size AS file_size,
          knowledge_1.file_sha1 AS file_sha1,
          knowledge_1.created_at AS created_at,
          knowledge_1.updated_at AS updated_at,
          knowledge_1.metadata AS metadata,
          knowledge_1.is_folder AS is_folder,
          knowledge_1.user_id AS user_id,
          knowledge_1.parent_id AS parent_id,
          knowledge_1.sync_id AS sync_id,
          knowledge_1.sync_file_id AS sync_file_id
   FROM knowledge AS knowledge_1
   WHERE knowledge_1.parent_id = '40ba47d7-51b2-4b2a-9247-89e29619efb0'::UUID
   UNION ALL SELECT knowledge_2.id AS id,
                    knowledge_2.file_name AS file_name,
                    knowledge_2.url AS url,
                    knowledge_2.extension AS extension,
                    knowledge_2.status AS status,
                    knowledge_2.source AS SOURCE,
                    knowledge_2.source_link AS source_link,
                    knowledge_2.file_size AS file_size,
                    knowledge_2.file_sha1 AS file_sha1,
                    knowledge_2.created_at AS created_at,
                    knowledge_2.updated_at AS updated_at,
                    knowledge_2.metadata AS metadata,
                    knowledge_2.is_folder AS is_folder,
                    knowledge_2.user_id AS user_id,
                    knowledge_2.parent_id AS parent_id,
                    knowledge_2.sync_id AS sync_id,
                    knowledge_2.sync_file_id AS sync_file_id
   FROM knowledge AS knowledge_2
   JOIN knowledge_tree ON knowledge_2.parent_id = knowledge_tree.id)
SELECT knowledge.id,
       knowledge.file_name,
       knowledge.url,
       knowledge.extension,
       knowledge.status,
       knowledge.source,
       knowledge.source_link,
       knowledge.file_size,
       knowledge.file_sha1,
       knowledge.created_at,
       knowledge.updated_at,
       knowledge.metadata,
       knowledge.is_folder,
       knowledge.user_id,
       knowledge.parent_id,
       knowledge.sync_id,
       knowledge.sync_file_id,
       prompts_1.id AS id_1,
       prompts_1.content,
       prompts_1.title,
       prompts_1.status AS status_1,
       brains_1.brain_id,
       brains_1.name,
       brains_1.description,
       brains_1.status AS status_2,
       brains_1.model,
       brains_1.max_tokens,
       brains_1.temperature,
       brains_1.last_update,
       brains_1.brain_type,
       brains_1.prompt_id,
       syncs_1.id AS id_2,
       syncs_1.name AS name_1,
       syncs_1.provider,
       syncs_1.email,
       syncs_1.user_id AS user_id_1,
       syncs_1.credentials,
       syncs_1.state,
       syncs_1.created_at AS created_at_1,
       syncs_1.updated_at AS updated_at_1,
       syncs_1.last_synced_at,
       syncs_1.additional_data
FROM knowledge
JOIN knowledge_tree ON knowledge.id = knowledge_tree.id
LEFT OUTER JOIN (knowledge_brain AS knowledge_brain_1
                 JOIN brains AS brains_1 ON brains_1.brain_id = knowledge_brain_1.brain_id) ON knowledge.id = knowledge_brain_1.knowledge_id
LEFT OUTER JOIN prompts AS prompts_1 ON prompts_1.id = brains_1.prompt_id
LEFT OUTER JOIN syncs AS syncs_1 ON syncs_1.id = knowledge.sync_id

Should be simpler:

Previous implementation

WITH RECURSIVE knowledge_tree AS (
    SELECT *
    FROM knowledge
    WHERE parent_id = :parent_id
    UNION ALL
    SELECT k.*
    FROM knowledge k
    JOIN knowledge_tree kt ON k.parent_id = kt.id
)
SELECT * FROM knowledge_tree
linear[bot] commented 1 month ago

ENT-88 Optimize get_knowledge_tree