BBC-Esq / VectorDB-Plugin-for-LM-Studio

Plugin that lets you use LM Studio to ask questions about your documents including audio and video files.
247 stars 34 forks source link

look into long context reordering #190

Closed BBC-Esq closed 5 days ago

BBC-Esq commented 2 weeks ago

https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/long_context_reorder/

#-------------------------------Prepare Vector Database----------------------
# Build a sample vectorDB
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
import os

os.environ["OPENAI_API_KEY"] = "Your Open AI KEY"

# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)
splits = text_splitter.split_documents(data)

# VectorDB
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)

#--------------------------------QnA Part and Reordering------------------------------------
from langchain.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_transformers import (
    LongContextReorder,
)

llm = ChatOpenAI()

qa_system_prompt = """
        Use the following pieces of retrieved context to answer the question. \
        If you don't know the answer, just say that you don't know. \

        {context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        ("human", "{question}"),
    ]
)

def format_docs(docs):
    #Called the reordering function in here
    reordering = LongContextReorder()
    reordered_docs = reordering.transform_documents(docs)
    doc_strings = [doc.page_content for doc in reordered_docs]
    return "\n\n".join(doc_strings)

rag_chain = (
    {"context": vectordb.as_retriever() | format_docs, "question": RunnablePassthrough()}
    | qa_prompt
    | llm
    | StrOutputParser()
)
rag_chain.invoke("What are the approaches to Task Decomposition?")