HKUDS / LightRAG

"LightRAG: Simple and Fast Retrieval-Augmented Generation"
https://arxiv.org/abs/2410.05779
MIT License
8.51k stars 1.01k forks source link

hf_demo HF demo running error #226

Open Z-oo883 opened 1 week ago

Z-oo883 commented 1 week ago

code:

import os from lightrag import LightRAG, QueryParam from lightrag.llm import hf_model_complete, hf_embedding from lightrag.utils import EmbeddingFunc from transformers import AutoModel, AutoTokenizer

WORKING_DIR = "./dickens"

if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR)

rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=hf_model_complete, llm_model_name="meta-llama/Llama-3.1-8B-Instruct", embedding_func=EmbeddingFunc( embedding_dim=1024, max_token_size=5000, func=lambda texts: hf_embedding( texts, tokenizer=AutoTokenizer.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), embed_model=AutoModel.from_pretrained( "sentence-transformers/all-MiniLM-L6-v2" ), ), ), )

with open(r".\mock_data.txt", "r", encoding="utf-8") as f: rag.insert(f.read())

Perform naive search

print( rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) )

Perform local search

print( rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) )

Perform global search

print( rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) )

Perform hybrid search

print( rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) )

error: INFO:lightrag:Logger initialized for working directory: ./dickens DEBUG:lightrag:LightRAG init with param: working_dir = ./dickens, chunk_token_size = 1200, chunk_overlap_token_size = 100, tiktoken_model_name = gpt-4o-mini, entity_extract_max_gleaning = 1, entity_summary_to_max_tokens = 500, node_embedding_algorithm = node2vec, node2vec_params = {'dimensions': 1536, 'num_walks': 10, 'walk_length': 40, 'window_size': 2, 'iterations': 3, 'random_seed': 3}, embedding_func = {'embedding_dim': 1024, 'max_token_size': 5000, 'func': <function at 0x00000210A8B83E20>}, embedding_batch_num = 32, embedding_func_max_async = 16, llm_model_func = <function hf_model_complete at 0x00000210B764D2D0>, llm_model_name = meta-llama/Llama-3.1-8B-Instruct, llm_model_max_token_size = 32768, llm_model_max_async = 16, key_string_value_json_storage_cls = <class 'lightrag.storage.JsonKVStorage'>, vector_db_storage_cls = <class 'lightrag.storage.NanoVectorDBStorage'>, vector_db_storage_cls_kwargs = {}, graph_storage_cls = <class 'lightrag.storage.NetworkXStorage'>, enable_llm_cache = True, addon_params = {}, convert_response_to_json_func = <function convert_response_to_json at 0x00000210B762A5F0>

INFO:lightrag:Load KV full_docs with 0 data INFO:lightrag:Load KV text_chunks with 0 data INFO:lightrag:Load KV llm_response_cache with 0 data INFO:lightrag:Loaded graph from ./dickens\graph_chunk_entity_relation.graphml with 0 nodes, 0 edges INFO:nano-vectordb:Load (0, 1024) data INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_entities.json'} 0 data INFO:nano-vectordb:Load (0, 1024) data INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_relationships.json'} 0 data INFO:nano-vectordb:Load (0, 1024) data INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': './dickens\vdb_chunks.json'} 42 data INFO:lightrag:Creating a new event loop in a sub-thread. INFO:lightrag:[New Docs] inserting 1 docs INFO:lightrag:[New Chunks] inserting 42 chunks INFO:lightrag:Inserting 42 vectors to chunks INFO:lightrag:Writing graph with 0 nodes, 0 edges Traceback (most recent call last): File "D:\RA_LLM\agentic_light_rag\light_rag.py", line 34, in rag.insert(f.read()) File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\lightrag.py", line 164, in insert return loop.run_until_complete(self.ainsert(string_or_strings)) File "D:\anaconda\envs\light_rag\lib\asyncio\base_events.py", line 649, in run_until_complete return future.result() File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\lightrag.py", line 208, in ainsert await self.chunks_vdb.upsert(inserting_chunks) File "D:\anaconda\envs\light_rag\lib\site-packages\lightrag\storage.py", line 98, in upsert results = self._client.upsert(datas=list_data) File "D:\anaconda\envs\light_rag\lib\site-packages\nano_vectordb\dbs.py", line 92, in upsert self.__storage["matrix"][i] = update_d[f_VECTOR].astype(Float) IndexError: index 0 is out of bounds for axis 0 with size 0 please help me!thank you very much!

hjing100 commented 5 days ago

https://github.com/HKUDS/LightRAG/issues/34