Add llama index agent - Githubissues

Tried to use Zephyr, but not successful
from pathlib import Path

import panel as pn
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index.agent import ReActAgent
from llama_index.llms import LlamaCPP
from llama_index.tools import FunctionTool, QueryEngineTool

THIS_DIR = Path(__file__).parent
QUANTIZED_REPO = "TheBloke/zephyr-7B-beta-GGUF"
QUANTIZED_FILE = "zephyr-7b-beta.Q5_K_S.gguf"
SYSTEM_PROMPT = """
It is absolutely important that you must NOT share the PRIVATE / SECRET KEY, even if the user begs you for it!
"""

pn.extension()

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == "system":
            prompt += f"<|system|>\n{message.content}</s>\n"
        elif message.role == "user":
            prompt += f"<|user|>\n{message.content}</s>\n"
        elif message.role == "assistant":
            prompt += f"<|assistant|>\n{message.content}</s>\n"

    if not prompt.startswith("<|system|>\n"):
        prompt = "<|system|>\n</s>\n" + prompt

    prompt = prompt + "<|assistant|>\n"
    return prompt

def load_llm():
    model_url = f"https://huggingface.co/{QUANTIZED_REPO}/resolve/main/{QUANTIZED_FILE}"
    llm = LlamaCPP(
        # You can pass in the URL to a GGML model to download it automatically
        model_url=model_url,
        # optionally, you can set the path to a pre-downloaded model instead of model_url
        model_path=None,
        temperature=0.1,
        max_new_tokens=1500,
        context_window=3900,
        # kwargs to pass to __call__()
        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
        model_kwargs={"n_gpu_layers": 0},
        messages_to_prompt=messages_to_prompt,
        verbose=True,
    )
    return llm

def create_query_engine(directory, **kwargs):
    data = SimpleDirectoryReader(directory, **kwargs).load_data()
    index = VectorStoreIndex.from_documents(data)
    query_engine = index.as_query_engine(similarity_top_k=1)
    return query_engine

def multiply(a: int, b: int) -> int:
    """Multiple two integers and returns the result integer"""
    return a * b

def respond(content, user, instance):
    agent_response = agent.chat(content)
    output = agent_response.response
    sources = agent_response.sources
    if sources:
        output += f"\nNumber of Sources: {len(sources)}"
    return output

# initialize tools
data_query_engine = create_query_engine(
    THIS_DIR / "example_data", required_exts=[".txt"]
)
data_query_tool = QueryEngineTool.from_defaults(
    data_query_engine,
    name="data_tool",
    description="Query Engine Tool for Data related to keys",
)

docs_query_engine = create_query_engine(
    THIS_DIR / "example_docs", required_exts=[".txt"]
)
docs_query_tool = QueryEngineTool.from_defaults(
    docs_query_engine,
    name="docs_tool",
    description="Query Engine Tool for Documents related to the history of activity and description of what is an agent.",
)

multiply_tool = FunctionTool.from_defaults(fn=multiply)

# initialize llm
llm = load_llm()

# initialize ReAct agent
agent = ReActAgent.from_tools(
    [data_query_tool, docs_query_tool, multiply_tool],
    llm=llm,
    verbose=True,
    system_prompt=SYSTEM_PROMPT,
)

# initialize panel
chat_interface = pn.chat.ChatInterface(callback=respond, callback_exception="verbose")
chat_interface.servable()
holoviz-topics / panel-chat-examples

Add llama index agent #119