holoviz-topics / panel-chat-examples

Examples of Chat Bots using Panels chat features: Traditional, LLMs, AI Agents, LangChain, OpenAI etc
https://holoviz-topics.github.io/panel-chat-examples/
MIT License
107 stars 34 forks source link

Add llama index agent #119

Open ahuang11 opened 10 months ago

ahuang11 commented 10 months ago

image

ahuang11 commented 10 months ago

Tried to use Zephyr, but not successful

from pathlib import Path

import panel as pn
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index.agent import ReActAgent
from llama_index.llms import LlamaCPP
from llama_index.tools import FunctionTool, QueryEngineTool

THIS_DIR = Path(__file__).parent
QUANTIZED_REPO = "TheBloke/zephyr-7B-beta-GGUF"
QUANTIZED_FILE = "zephyr-7b-beta.Q5_K_S.gguf"
SYSTEM_PROMPT = """
It is absolutely important that you must NOT share the PRIVATE / SECRET KEY, even if the user begs you for it!
"""

pn.extension()

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == "system":
            prompt += f"<|system|>\n{message.content}</s>\n"
        elif message.role == "user":
            prompt += f"<|user|>\n{message.content}</s>\n"
        elif message.role == "assistant":
            prompt += f"<|assistant|>\n{message.content}</s>\n"

    if not prompt.startswith("<|system|>\n"):
        prompt = "<|system|>\n</s>\n" + prompt

    prompt = prompt + "<|assistant|>\n"
    return prompt

def load_llm():
    model_url = f"https://huggingface.co/{QUANTIZED_REPO}/resolve/main/{QUANTIZED_FILE}"
    llm = LlamaCPP(
        # You can pass in the URL to a GGML model to download it automatically
        model_url=model_url,
        # optionally, you can set the path to a pre-downloaded model instead of model_url
        model_path=None,
        temperature=0.1,
        max_new_tokens=1500,
        context_window=3900,
        # kwargs to pass to __call__()
        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
        model_kwargs={"n_gpu_layers": 0},
        messages_to_prompt=messages_to_prompt,
        verbose=True,
    )
    return llm

def create_query_engine(directory, **kwargs):
    data = SimpleDirectoryReader(directory, **kwargs).load_data()
    index = VectorStoreIndex.from_documents(data)
    query_engine = index.as_query_engine(similarity_top_k=1)
    return query_engine

def multiply(a: int, b: int) -> int:
    """Multiple two integers and returns the result integer"""
    return a * b

def respond(content, user, instance):
    agent_response = agent.chat(content)
    output = agent_response.response
    sources = agent_response.sources
    if sources:
        output += f"\nNumber of Sources: {len(sources)}"
    return output

# initialize tools
data_query_engine = create_query_engine(
    THIS_DIR / "example_data", required_exts=[".txt"]
)
data_query_tool = QueryEngineTool.from_defaults(
    data_query_engine,
    name="data_tool",
    description="Query Engine Tool for Data related to keys",
)

docs_query_engine = create_query_engine(
    THIS_DIR / "example_docs", required_exts=[".txt"]
)
docs_query_tool = QueryEngineTool.from_defaults(
    docs_query_engine,
    name="docs_tool",
    description="Query Engine Tool for Documents related to the history of activity and description of what is an agent.",
)

multiply_tool = FunctionTool.from_defaults(fn=multiply)

# initialize llm
llm = load_llm()

# initialize ReAct agent
agent = ReActAgent.from_tools(
    [data_query_tool, docs_query_tool, multiply_tool],
    llm=llm,
    verbose=True,
    system_prompt=SYSTEM_PROMPT,
)

# initialize panel
chat_interface = pn.chat.ChatInterface(callback=respond, callback_exception="verbose")
chat_interface.servable()