run-llama / llama_index

LlamaIndex is a data framework for your LLM applications
https://docs.llamaindex.ai
MIT License
35.8k stars 5.07k forks source link

[Question]: Sub question and router query engine #16255

Open navaneethstabilix opened 3 days ago

navaneethstabilix commented 3 days ago

Question Validation

Question

How to add sub question and router query engine in single code

dosubot[bot] commented 3 days ago

To add a sub-question and router query engine in a single code, you can follow the steps outlined below:

import os
import logging
import sys
from IPython.display import display, HTML
import nest_asyncio

# Install necessary packages
!pip install llama-index

# Apply nest_asyncio for Jupyter notebook
nest_asyncio.apply()

# Set up logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = []
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.INFO)
logger.addHandler(handler)

# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-..."

from llama_index.core import (
    VectorStoreIndex,
    SummaryIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
)
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.core.tools.query_engine import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors.pydantic_selectors import PydanticSingleSelector, PydanticMultiSelector
from llama_index.core.query_engine.sub_question_query_engine import SubQuestionQueryEngine

# Download data
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

# Load data
documents = SimpleDirectoryReader("data/paul_graham").load_data()

# Create nodes
parser = SentenceSplitter(chunk_size=1024, chunk_overlap=100)
nodes = parser(documents)

# Create indices
summary_index = SummaryIndex(nodes)
vector_index = VectorStoreIndex(nodes)

# Define query engines
summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize", use_async=True)
vector_query_engine = vector_index.as_query_engine()

# Build tools
summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description="Useful for summarization questions related to Paul Graham essay on What I Worked On.",
)
vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description="Useful for retrieving specific context from Paul Graham essay on What I Worked On.",
)

# Create Router Query Engine
router_query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[summary_tool, vector_tool],
)

# Download additional data for SubQuestion Query Engine
!mkdir -p 'data/10k/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/lyft_2021.pdf' -O 'data/10k/lyft_2021.pdf'

# Load additional data
lyft_docs = SimpleDirectoryReader(input_files=["./data/10k/lyft_2021.pdf"]).load_data()
uber_docs = SimpleDirectoryReader(input_files=["./data/10k/uber_2021.pdf"]).load_data()

# Create additional indices
lyft_index = VectorStoreIndex.from_documents(lyft_docs)
uber_index = VectorStoreIndex.from_documents(uber_docs)

# Define additional query engines
lyft_engine = lyft_index.as_query_engine(similarity_top_k=3)
uber_engine = uber_index.as_query_engine(similarity_top_k=3)

# Define additional tools
query_engine_tools = [
    QueryEngineTool(
        query_engine=lyft_engine,
        metadata=ToolMetadata(
            name="lyft_10k",
            description="Provides information about Lyft financials for year 2021",
        ),
    ),
    QueryEngineTool(
        query_engine=uber_engine,
        metadata=ToolMetadata(
            name="uber_10k",
            description="Provides information about Uber financials for year 2021",
        ),
    ),
]

# Create SubQuestion Query Engine
sub_question_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools
)

# Query using Router Query Engine
response = router_query_engine.query("What is the summary of the document?")
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

# Query using SubQuestion Query Engine
response = await sub_question_query_engine.aquery(
    "Compare revenue growth of Uber and Lyft from 2020 to 2021"
)
display(HTML(f'<p style="font-size:20px">{response.response}</p>'))

This code demonstrates how to set up and use both the Router Query Engine and the SubQuestion Query Engine in a single script [1].

To continue talking to Dosu, mention @dosu.