Open navaneethstabilix opened 3 days ago
To add a sub-question and router query engine in a single code, you can follow the steps outlined below:
import os
import logging
import sys
from IPython.display import display, HTML
import nest_asyncio
# Install necessary packages
!pip install llama-index
# Apply nest_asyncio for Jupyter notebook
nest_asyncio.apply()
# Set up logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = []
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
logger.addHandler(handler)
# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-..."
from llama_index.core import (
VectorStoreIndex,
SummaryIndex,
SimpleDirectoryReader,
ServiceContext,
StorageContext,
)
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.core.tools.query_engine import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors.pydantic_selectors import PydanticSingleSelector, PydanticMultiSelector
from llama_index.core.query_engine.sub_question_query_engine import SubQuestionQueryEngine
# Download data
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
# Load data
documents = SimpleDirectoryReader("data/paul_graham").load_data()
# Create nodes
parser = SentenceSplitter(chunk_size=1024, chunk_overlap=100)
nodes = parser(documents)
# Create indices
summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)
# Define query engines
summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", use_async=True)
vector_query_engine = vector_index.as_query_engine()
# Build tools
summary_tool = QueryEngineTool.from_defaults(
query_engine=summary_query_engine,
description="Useful for summarization questions related to Paul Graham essay on What I Worked On.",
)
vector_tool = QueryEngineTool.from_defaults(
query_engine=vector_query_engine,
description="Useful for retrieving specific context from Paul Graham essay on What I Worked On.",
)
# Create Router Query Engine
router_query_engine = RouterQueryEngine(
selector=PydanticSingleSelector.from_defaults(),
query_engine_tools=[summary_tool, vector_tool],
)
# Download additional data for SubQuestion Query Engine
!mkdir -p 'data/10k/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf'
# Load additional data
lyft_docs = SimpleDirectoryReader(input_files=["./data/10k/lyft_2021.pdf"]).load_data()
uber_docs = SimpleDirectoryReader(input_files=["./data/10k/uber_2021.pdf"]).load_data()
# Create additional indices
lyft_index = VectorStoreIndex.from_documents(lyft_docs)
uber_index = VectorStoreIndex.from_documents(uber_docs)
# Define additional query engines
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)
uber_engine = uber_index.as_query_engine(similarity_top_k=3)
# Define additional tools
query_engine_tools = [
QueryEngineTool(
query_engine=lyft_engine,
metadata=ToolMetadata(
name="lyft_10k",
description="Provides information about Lyft financials for year 2021",
),
),
QueryEngineTool(
query_engine=uber_engine,
metadata=ToolMetadata(
name="uber_10k",
description="Provides information about Uber financials for year 2021",
),
),
]
# Create SubQuestion Query Engine
sub_question_query_engine = SubQuestionQueryEngine.from_defaults(
query_engine_tools=query_engine_tools
)
# Query using Router Query Engine
response = router_query_engine.query("What is the summary of the document?")
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))
# Query using SubQuestion Query Engine
response = await sub_question_query_engine.aquery(
"Compare revenue growth of Uber and Lyft from 2020 to 2021"
)
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))
This code demonstrates how to set up and use both the Router Query Engine and the SubQuestion Query Engine in a single script [1].
To continue talking to Dosu, mention @dosu.
Question Validation
Question
How to add sub question and router query engine in single code