holoviz-topics / panel-chat-examples

Examples of Chat Bots using Panels chat features: Traditional, LLMs, AI Agents, LangChain, OpenAI etc
https://holoviz-topics.github.io/panel-chat-examples/
MIT License
105 stars 32 forks source link

Spinner never stops after download #47

Closed MarcSkovMadsen closed 11 months ago

MarcSkovMadsen commented 11 months ago

I'm working on the llama_and_mistral example.

The spinner never stops or hides after the download finishes.

image

This is True both for the current version in main branch and the below version.

"""
Demonstrates how to use the ChatInterface widget to create a chatbot using
Llama2.
"""

import panel as pn
from langchain.chains import LLMChain
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate

pn.extension()

MODEL_KWARGS = {
    "llama": {
        "model": "TheBloke/Llama-2-7b-Chat-GGUF",
        "model_file": "llama-2-7b-chat.Q5_K_M.gguf",
    },
    "mistral": {
        "model": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        "model_file": "mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    },
}

llm_chains = pn.state.cache["llm_chains"]=pn.state.cache.get("llm_chains", {})
responses = pn.state.cache["responses"]=pn.state.cache.get("responses", {})

TEMPLATE = """<s>[INST] You are a friendly chat bot who's willing to help answer the
user:
{user_input} [/INST] </s>
"""

CONFIG = {"max_new_tokens": 256, "temperature": 0.5}

def _get_llm_chain(model, template=TEMPLATE, config=CONFIG):
    llm = CTransformers(**MODEL_KWARGS[model], config=config)
    prompt = PromptTemplate(template=template, input_variables=["user_input"])
    return LLMChain(prompt=prompt, llm=llm)

# Cannot use pn.cache due to https://github.com/holoviz/panel/issues/4236
async def _get_response(contents: str, model: str)->str:
    key = (contents, model)
    if key in responses:
        return responses[key]

    llm_chain = llm_chains[model]
    response = responses[key] = await llm_chain.apredict(user_input=contents)
    return response

async def callback(contents: str, user: str, instance: pn.widgets.ChatInterface):
    for model in MODEL_KWARGS:
        if model not in llm_chains:
            instance.placeholder_text = (
                f"Downloading {model}, this may take a few minutes,"
                f"or longer, depending on your internet connection."
            )
            llm_chains[model] = _get_llm_chain(model)

        response = await _get_response(contents, model)
        instance.send(response, user=model.title(), respond=False)

chat_interface = pn.widgets.ChatInterface(callback=callback, placeholder_threshold=0.1)
chat_interface.send(
    "Send a message to get a reply from both Llama 2 and Mistral (7B)!",
    user="System",
    respond=False,
)
chat_interface.servable()
ahuang11 commented 11 months ago

Fixed now.