#-------------------------------Prepare Vector Database----------------------
# Build a sample vectorDB
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
import os
os.environ["OPENAI_API_KEY"] = "Your Open AI KEY"
# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
splits = text_splitter.split_documents(data)
# VectorDB
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)
#--------------------------------QnA Part and Reordering------------------------------------
from langchain.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_transformers import (
LongContextReorder,
)
llm = ChatOpenAI()
qa_system_prompt = """
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
[
("system", qa_system_prompt),
("human", "{question}"),
]
)
def format_docs(docs):
#Called the reordering function in here
reordering = LongContextReorder()
reordered_docs = reordering.transform_documents(docs)
doc_strings = [doc.page_content for doc in reordered_docs]
return "\n\n".join(doc_strings)
rag_chain = (
{"context": vectordb.as_retriever() | format_docs, "question": RunnablePassthrough()}
| qa_prompt
| llm
| StrOutputParser()
)
rag_chain.invoke("What are the approaches to Task Decomposition?")
https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/long_context_reorder/