Chainlit / chainlit

Build Conversational AI in minutes ⚡️
https://docs.chainlit.io
Apache License 2.0
6.85k stars 902 forks source link

Processing Multiple Documents to Chat With #915

Open Hiten-03 opened 5 months ago

Hiten-03 commented 5 months ago

Describe the bug I am working on a project where a user can chat with multiple documents which are uploaded by the user. For single document it works fine. But when I upload 2-3 documents, it only takes last document and give answers only related to the last document

Below is my code

@cl.on_chat_start
async def on_chat_start():

    files = await cl.AskFileMessage(
        content="Please upload a text file to begin!",
        accept=["text/plain","text/csv", "application/pdf"],
        max_size_mb=10,
        max_files=3,
        timeout=180,
    ).send()

    for file in files:
        msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
        await msg.send()

        if file.name.split('.')[1] == "pdf":
            elements = [
                cl.Pdf(name=file.name, display="inline", path=file.path)
            ]
            # Reminder: The name of the pdf must be in the content of the message
            await cl.Message(content="Here is your uploaded PDF!", elements=elements).send()
            print('its a file path',file.path)
            loader = PyPDFLoader(file.path)
            data = loader.load()
            # print('data - type',data, type(data),dir(data))
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=100,
                separators=["\n\n", "\n", " ", ""]
            )
            docs = text_splitter.split_documents(data)

        client = chromadb.EphemeralClient()
        client_settings = Settings(
            chroma_db_impl="duckdb+parquet",
            anonymized_telemetry=False,
            allow_reset=True,
        )

        collection_name = ''.join(random.choices(string.ascii_letters + string.digits, k=16))

        docsearch = await cl.make_async(Chroma.from_documents)(
            client=client,
            documents=docs,
            embedding=embeddings,
            collection_name = collection_name,
            client_settings=client_settings
        )

        message_history = ChatMessageHistory()

        memory = ConversationBufferMemory(
            memory_key="chat_history",
            output_key="answer",
            chat_memory=message_history,
            return_messages=True,
        )

        PROMPT = PromptTemplate(template=prompt_template,
                                input_variables=["context", "question"])

        chain = ConversationalRetrievalChain.from_llm(
            llm= llm,
            chain_type="stuff",
            retriever=docsearch.as_retriever(),
            memory=memory,
            return_source_documents=True,
            combine_docs_chain_kwargs={"prompt": PROMPT},
        )

        await cl.Message(content="Hello, I am Smart Document Assistant. I am here to help you with any question you may have about the uploaded document."+"\n", disable_feedback=False).send()

        cl.user_session.set("chain", chain)

Expected behavior It should process multiple documents and should answer questions based on all documents uploaded

vivekjainmaiet commented 4 months ago

Any update on this ? Did you find any solution?