[X] I have searched both the documentation and discord for an answer.
Question
Hello, I am wondering how can I can use embeddings for querying with this kg index in my script, I have used the include_embeddings=True and embedding_mode="hybrid" and then storing mt created index in a persistent storage. However, when I try to check if the embeddings are created and are present in my index, I run into problems, additionally, within my docstore in my persistent storage I can see the embeddings field being null, making me even more confused about if the embeddings are even being generated. Can anyone help me here?
from nebula3.gclient.net import ConnectionPool
from nebula3.Config import Config
from llama_index.core import (
VectorStoreIndex,
SimpleDirectoryReader,
KnowledgeGraphIndex,
Settings,
StorageContext,
PromptTemplate,
load_index_from_storage
)
from llama_index.core import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.graph_stores.nebula import NebulaGraphStore
from llama_index.core.query_engine import KnowledgeGraphQueryEngine
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever
from typing import List
from llama_index.core.schema import NodeWithScore, QueryBundle
import os
import json
import base64
import subprocess
# Configure OpenAI settings
Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.embed_model = embed_model
Settings.chunk_size = 512
# Environment variables for NebulaGraph connection
os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula"
os.environ["NEBULA_ADDRESS"] = "127.0.0.1:9669"
# NebulaGraph store configuration
space_name = "embtest"
edge_types, rel_prop_names = ["relationship"], ["relationship"]
tags = ["entity"]
graph_store = NebulaGraphStore(
space_name=space_name,
edge_types=edge_types,
rel_prop_names=rel_prop_names,
tags=tags
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)
# Load documents
documents = SimpleDirectoryReader("./data/paul_graham").load_data()
# Convert document text to lowercase
for doc in documents:
doc.text = doc.text.lower()
# Generate embeddings and create KnowledgeGraphIndex
print("Generating embeddings and creating KnowledgeGraphIndex...")
kg_index = KnowledgeGraphIndex.from_documents(
documents,
storage_context=storage_context,
max_triplets_per_chunk=10,
space_name=space_name,
edge_types=edge_types,
rel_prop_names=rel_prop_names,
tags=tags,
max_knowledge_sequence=15,
include_embeddings=True,
)
# Debug: Print out embeddings during the indexing process
for doc in documents:
embedding = embed_model.embed(doc.text)
print(f"Document ID: {doc.id}")
print(f"Embedding: {embedding[:20]}") # Print first 20 elements of the embedding
# Persist the KnowledgeGraphIndex
kg_index.storage_context.persist(persist_dir='./storage_graph2')
print("KnowledgeGraphIndex created and persisted.")
# Load the persisted KnowledgeGraphIndex
print("Loading KnowledgeGraphIndex from persistent storage...")
kg_index = load_index_from_storage(storage_context=storage_context, persist_dir='./storage_graph2')
# Print out embeddings from the loaded index
print("Printing embeddings from the loaded index:")
nodes = kg_index.graph_store.get_nodes()
for node in nodes:
if hasattr(node, 'embedding'):
embedding = node.embedding
print(f"Node ID: {node.id}")
print(f"Embedding: {embedding[:20]}") # Print first 20 elements of the embedding
print("Loaded KnowledgeGraphIndex and printed embeddings.")
# Set up query engine using the as_query_engine method
query_engine = kg_index.as_query_engine(
include_text=True,
response_mode="tree_summarize",
embedding_mode="hybrid",
similarity_top_k=5,
)
# Execute a sample query
response = query_engine.query("What is Hacker news")
print(response)
Question Validation
Question
Hello, I am wondering how can I can use embeddings for querying with this kg index in my script, I have used the
include_embeddings=True
andembedding_mode="hybrid"
and then storing mt created index in a persistent storage. However, when I try to check if the embeddings are created and are present in my index, I run into problems, additionally, within my docstore in my persistent storage I can see the embeddings field being null, making me even more confused about if the embeddings are even being generated. Can anyone help me here?