Create an instance in Neo4j and launch a free online instance
Save the password, url, uri.
# At the top of the file, after the imports
import os
from dotenv import load_dotenv
# Load environment variables from .env file if it exists
load_dotenv()
# Set Neo4j environment variables with fallback values
NEO4J_URI = os.getenv("NEO4J_URI", "")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
if not NEO4J_PASSWORD:
raise ValueError("NEO4J_PASSWORD environment variable must be set")
import asyncio
import aiohttp
import numpy as np
import nest_asyncio
import shutil
from lightrag import LightRAG, QueryParam
from lightrag.llm import openai_complete_if_cache
from lightrag.utils import EmbeddingFunc
from neo4j import AsyncGraphDatabase
# Allow nested event loops for Jupyter or environments that require it
nest_asyncio.apply()
WORKING_DIR = "neo4j-local"
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
# Define the LLM model function
async def llm_model_func(
prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
return await openai_complete_if_cache(
"meta/llama-3.1-8b-instruct",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key="ollama",
base_url="http://xxx:yyy/v1",
**kwargs,
)
# Fetch embeddings from server function
async def fetch_embeddings_from_server(
texts: list[str],
model: str = "nvidia/nv-embedqa-e5-v5",
base_url: str = "http://xxx:yyy/v1/embeddings",
api_key: str = None,
input_type: str = "passage",
encoding_format: str = "float",
user: str = "user-identifier",
truncate: str = "END",
max_token_size: int = 512
) -> np.ndarray:
headers = {
"accept": "application/json",
"Content-Type": "application/json"
}
payload = {
"input": texts,
"model": model,
"input_type": input_type,
"encoding_format": encoding_format,
"user": user,
"truncate": truncate
}
async with aiohttp.ClientSession() as session:
async with session.post(base_url, headers=headers, json=payload) as response:
response_data = await response.json()
if 'data' not in response_data:
raise ValueError(f"Unexpected response format: {response_data}")
embeddings = [item["embedding"] for item in response_data["data"]]
return np.array(embeddings)
# Async function to determine embedding dimension
async def get_embedding_dim():
test_text = ["This is a test sentence."]
embedding = await fetch_embeddings_from_server(test_text)
embedding_dim = embedding.shape[1]
print(f"Embedding Dimensions: {embedding.shape}")
return embedding_dim
# Run an async function to set up LightRAG
async def main():
embedding_dimension = await get_embedding_dim()
# Initialize the LightRAG instance with Neo4j configuration
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=embedding_dimension,
max_token_size=8192,
func=fetch_embeddings_from_server,
),
kg="Neo4JStorage",
log_level="DEBUG"
)
try:
# Load text into the knowledge graph
with open("./book.txt") as f:
await rag.insert(f.read())
# Perform queries with different search modes
for mode in ["naive", "local", "global", "hybrid"]:
try:
result = await rag.query(
"What are the top themes in this story?",
param=QueryParam(mode=mode)
)
print(f"\nResults for {mode} mode:")
print(result)
except Exception as e:
print(f"Query failed for {mode} mode: {str(e)}")
except Exception as e:
print(f"An error occurred: {str(e)}")
# Run the main async function
asyncio.run(main())
Output:
(rag) root@labserver04:~/tharun/Light-RAG# python3 neon.py
Embedding Dimensions: (1, 1024)
INFO:lightrag:Logger initialized for working directory: neo4j-local
DEBUG:lightrag:LightRAG init with param:
working_dir = neo4j-local,
kg = Neo4JStorage,
log_level = DEBUG,
chunk_token_size = 1200,
chunk_overlap_token_size = 100,
tiktoken_model_name = gpt-4o-mini,
entity_extract_max_gleaning = 1,
entity_summary_to_max_tokens = 500,
node_embedding_algorithm = node2vec,
node2vec_params = {'dimensions': 1536, 'num_walks': 10, 'walk_length': 40, 'window_size': 2, 'iterations': 3, 'random_seed': 3},
embedding_func = {'embedding_dim': 1024, 'max_token_size': 8192, 'func': <function fetch_embeddings_from_server at 0x7fc6bbcea680>},
embedding_batch_num = 32,
embedding_func_max_async = 16,
llm_model_func = <function llm_model_func at 0x7fc6bbcea710>,
llm_model_name = meta-llama/Llama-3.2-1B-Instruct,
llm_model_max_token_size = 32768,
llm_model_max_async = 16,
llm_model_kwargs = {},
key_string_value_json_storage_cls = <class 'lightrag.storage.JsonKVStorage'>,
vector_db_storage_cls = <class 'lightrag.storage.NanoVectorDBStorage'>,
vector_db_storage_cls_kwargs = {},
enable_llm_cache = True,
addon_params = {},
convert_response_to_json_func = <function convert_response_to_json at 0x7fc6c9742d40>
INFO:lightrag:Load KV full_docs with 0 data
INFO:lightrag:Load KV text_chunks with 0 data
INFO:lightrag:Load KV llm_response_cache with 84 data
INFO:nano-vectordb:Load (0, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': 'neo4j-local/vdb_entities.json'} 0 data
INFO:nano-vectordb:Load (0, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': 'neo4j-local/vdb_relationships.json'} 0 data
INFO:nano-vectordb:Load (42, 1024) data
INFO:nano-vectordb:Init {'embedding_dim': 1024, 'metric': 'cosine', 'storage_file': 'neo4j-local/vdb_chunks.json'} 42 data
INFO:lightrag:[New Docs] inserting 1 docs
INFO:lightrag:[New Chunks] inserting 42 chunks
INFO:lightrag:Inserting 42 vectors to chunks
INFO:lightrag:[Entity Extraction]...
â ¹ Processed 42 chunks, 689 entities(duplicated), 427 relations(duplicated)
ERROR:neo4j.pool:Unable to retrieve routing information
KG successfully indexed.
An error occurred: Unable to retrieve routing information
I am trying to try the LightRAG implementation of using NEO4J as Storage as stated in the README
[2024.11.04]🎯📢You can use Neo4J for Storage now.
Steps to reproduce the error
Output: