Describe the bug
I am working on a project where a user can chat with multiple documents which are uploaded by the user. For single document it works fine. But when I upload 2-3 documents, it only takes last document and give answers only related to the last document
Below is my code
@cl.on_chat_start
async def on_chat_start():
files = await cl.AskFileMessage(
content="Please upload a text file to begin!",
accept=["text/plain","text/csv", "application/pdf"],
max_size_mb=10,
max_files=3,
timeout=180,
).send()
for file in files:
msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
await msg.send()
if file.name.split('.')[1] == "pdf":
elements = [
cl.Pdf(name=file.name, display="inline", path=file.path)
]
# Reminder: The name of the pdf must be in the content of the message
await cl.Message(content="Here is your uploaded PDF!", elements=elements).send()
print('its a file path',file.path)
loader = PyPDFLoader(file.path)
data = loader.load()
# print('data - type',data, type(data),dir(data))
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100,
separators=["\n\n", "\n", " ", ""]
)
docs = text_splitter.split_documents(data)
client = chromadb.EphemeralClient()
client_settings = Settings(
chroma_db_impl="duckdb+parquet",
anonymized_telemetry=False,
allow_reset=True,
)
collection_name = ''.join(random.choices(string.ascii_letters + string.digits, k=16))
docsearch = await cl.make_async(Chroma.from_documents)(
client=client,
documents=docs,
embedding=embeddings,
collection_name = collection_name,
client_settings=client_settings
)
message_history = ChatMessageHistory()
memory = ConversationBufferMemory(
memory_key="chat_history",
output_key="answer",
chat_memory=message_history,
return_messages=True,
)
PROMPT = PromptTemplate(template=prompt_template,
input_variables=["context", "question"])
chain = ConversationalRetrievalChain.from_llm(
llm= llm,
chain_type="stuff",
retriever=docsearch.as_retriever(),
memory=memory,
return_source_documents=True,
combine_docs_chain_kwargs={"prompt": PROMPT},
)
await cl.Message(content="Hello, I am Smart Document Assistant. I am here to help you with any question you may have about the uploaded document."+"\n", disable_feedback=False).send()
cl.user_session.set("chain", chain)
Expected behavior
It should process multiple documents and should answer questions based on all documents uploaded
Describe the bug I am working on a project where a user can chat with multiple documents which are uploaded by the user. For single document it works fine. But when I upload 2-3 documents, it only takes last document and give answers only related to the last document
Below is my code
Expected behavior It should process multiple documents and should answer questions based on all documents uploaded