import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import hf_model_complete, hf_embedding
from lightrag.utils import EmbeddingFunc
from transformers import AutoModel, AutoTokenizer
WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
INFO:lightrag:Load KV full_docs with 0 data
INFO:lightrag:Load KV text_chunks with 0 data
INFO:lightrag:Load KV llm_response_cache with 0 data
INFO:lightrag:Loaded graph from ./dickens\graph_chunk_entity_relation.graphml with 0 nodes, 0 edges
INFO:nano-vectordb:Load (0, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_entities.json'} 0 data
INFO:nano-vectordb:Load (0, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_relationships.json'} 0 data
INFO:nano-vectordb:Load (0, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_chunks.json'} 42 data
INFO:lightrag:Creating a new event loop in a sub-thread.
INFO:lightrag:[New Docs] inserting 1 docs
INFO:lightrag:[New Chunks] inserting 42 chunks
INFO:lightrag:Inserting 42 vectors to chunks
INFO:lightrag:Writing graph with 0 nodes, 0 edges
Traceback (most recent call last):
File "D:\RA_LLM\agentic_light_rag\light_rag.py", line 34, in
rag.insert(f.read())
File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\lightrag.py", line 164, in insert
return loop.run_until_complete(self.ainsert(string_or_strings))
File "D:\anaconda\envs\light_rag\lib\asyncio\base_events.py", line 649, in run_until_complete
return future.result()
File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\lightrag.py", line 208, in ainsert
await self.chunks_vdb.upsert(inserting_chunks)
File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\storage.py", line 98, in upsert
results = self._client.upsert(datas=list_data)
File "D:\anaconda\envs\light_rag\lib\site-packages\nano_vectordb\dbs.py", line 92, in upsert
self.__storage["matrix"][i] = update_d[f_VECTOR].astype(Float)
IndexError: index 0 is out of bounds for axis 0 with size 0
please help me!thank you very much!
code:
import os from lightrag import LightRAG, QueryParam from lightrag.llm import hf_model_complete, hf_embedding from lightrag.utils import EmbeddingFunc from transformers import AutoModel, AutoTokenizer
WORKING_DIR = "./dickens"
if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR)
rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=hf_model_complete, llm_model_name="meta-llama/Llama-3.1-8B-Instruct", embedding_func=EmbeddingFunc( embedding_dim=1024, max_token_size=5000, func=lambda texts: hf_embedding( texts, tokenizer=AutoTokenizer.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), embed_model=AutoModel.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), ), ), )
with open(r".\mock_data.txt", "r", encoding="utf-8") as f: rag.insert(f.read())
Perform naive search
print( rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) )
Perform local search
print( rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) )
Perform global search
print( rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) )
Perform hybrid search
print( rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) )
error: INFO:lightrag:Logger initialized for working directory: ./dickens DEBUG:lightrag:LightRAG init with param: working_dir = ./dickens, chunk_token_size = 1200, chunk_overlap_token_size = 100, tiktoken_model_name = gpt-4o-mini, entity_extract_max_gleaning = 1, entity_summary_to_max_tokens = 500, node_embedding_algorithm = node2vec, node2vec_params = {'dimensions': 1536, 'num_walks': 10, 'walk_length': 40, 'window_size': 2, 'iterations': 3, 'random_seed': 3}, embedding_func = {'embedding_dim': 1024, 'max_token_size': 5000, 'func': <function at 0x00000210A8B83E20>},
embedding_batch_num = 32,
embedding_func_max_async = 16,
llm_model_func = <function hf_model_complete at 0x00000210B764D2D0>,
llm_model_name = meta-llama/Llama-3.1-8B-Instruct,
llm_model_max_token_size = 32768,
llm_model_max_async = 16,
key_string_value_json_storage_cls = <class 'lightrag.storage.JsonKVStorage'>,
vector_db_storage_cls = <class 'lightrag.storage.NanoVectorDBStorage'>,
vector_db_storage_cls_kwargs = {},
graph_storage_cls = <class 'lightrag.storage.NetworkXStorage'>,
enable_llm_cache = True,
addon_params = {},
convert_response_to_json_func = <function convert_response_to_json at 0x00000210B762A5F0>
INFO:lightrag:Load KV full_docs with 0 data INFO:lightrag:Load KV text_chunks with 0 data INFO:lightrag:Load KV llm_response_cache with 0 data INFO:lightrag:Loaded graph from ./dickens\graph_chunk_entity_relation.graphml with 0 nodes, 0 edges INFO:nano-vectordb:Load (0, 1024) data INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_entities.json'} 0 data INFO:nano-vectordb:Load (0, 1024) data INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_relationships.json'} 0 data INFO:nano-vectordb:Load (0, 1024) data INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_chunks.json'} 42 data INFO:lightrag:Creating a new event loop in a sub-thread. INFO:lightrag:[New Docs] inserting 1 docs INFO:lightrag:[New Chunks] inserting 42 chunks INFO:lightrag:Inserting 42 vectors to chunks INFO:lightrag:Writing graph with 0 nodes, 0 edges Traceback (most recent call last): File "D:\RA_LLM\agentic_light_rag\light_rag.py", line 34, in
rag.insert(f.read())
File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\lightrag.py", line 164, in insert
return loop.run_until_complete(self.ainsert(string_or_strings))
File "D:\anaconda\envs\light_rag\lib\asyncio\base_events.py", line 649, in run_until_complete
return future.result()
File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\lightrag.py", line 208, in ainsert
await self.chunks_vdb.upsert(inserting_chunks)
File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\storage.py", line 98, in upsert
results = self._client.upsert(datas=list_data)
File "D:\anaconda\envs\light_rag\lib\site-packages\nano_vectordb\dbs.py", line 92, in upsert
self.__storage["matrix"][i] = update_d[f_VECTOR].astype(Float)
IndexError: index 0 is out of bounds for axis 0 with size 0
please help me!thank you very much!