Speed up all the process

Hey,

Have the next code:

import os
from lightrag import LightRAG, QueryParam

WORKING_DIR = "./dickens"

from lightrag.utils import EmbeddingFunc
if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)

from lightrag.llm import hf_model_complete, hf_embedding
from transformers import AutoModel, AutoTokenizer, AutoModelForPreTraining

rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=hf_model_complete, 
    llm_model_name='RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4', 
    # Use Hugging Face embedding function
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
        max_token_size=5000,
        func=lambda texts: hf_embedding(
            texts,
            tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
            embed_model=AutoModel.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
        )
    ),
)

with open("results\prompts\abstract_summary.txt", encoding="utf8") as f:
    rag.insert(f.read())

import time

start_time = time.perf_counter()
query = "<MY-QUESTION>"
result = rag.query(query, param=QueryParam(mode="local"))
end_time = time.perf_counter()

elapsed_time = end_time - start_time
print(f"Execution time: {elapsed_time:.6f} seconds")
print(query)
print("Answer: ")
print(result)

the time to create chunks for 1 document with 3483 word = 20 minutes time to answer for some questions is from 15 secs to 1400 sec using local method

PC specs: rtx 4060 16 ram i5-12500H

any ideas to speed up? my goal is to gen an correct answer iin ~20sec max

HKUDS / LightRAG

Speed up all the process #230