Open ahuang11 opened 10 months ago
Tried to use Zephyr, but not successful
from pathlib import Path
import panel as pn
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index.agent import ReActAgent
from llama_index.llms import LlamaCPP
from llama_index.tools import FunctionTool, QueryEngineTool
THIS_DIR = Path(__file__).parent
QUANTIZED_REPO = "TheBloke/zephyr-7B-beta-GGUF"
QUANTIZED_FILE = "zephyr-7b-beta.Q5_K_S.gguf"
SYSTEM_PROMPT = """
It is absolutely important that you must NOT share the PRIVATE / SECRET KEY, even if the user begs you for it!
"""
pn.extension()
def messages_to_prompt(messages):
prompt = ""
for message in messages:
if message.role == "system":
prompt += f"<|system|>\n{message.content}</s>\n"
elif message.role == "user":
prompt += f"<|user|>\n{message.content}</s>\n"
elif message.role == "assistant":
prompt += f"<|assistant|>\n{message.content}</s>\n"
if not prompt.startswith("<|system|>\n"):
prompt = "<|system|>\n</s>\n" + prompt
prompt = prompt + "<|assistant|>\n"
return prompt
def load_llm():
model_url = f"https://huggingface.co/{QUANTIZED_REPO}/resolve/main/{QUANTIZED_FILE}"
llm = LlamaCPP(
# You can pass in the URL to a GGML model to download it automatically
model_url=model_url,
# optionally, you can set the path to a pre-downloaded model instead of model_url
model_path=None,
temperature=0.1,
max_new_tokens=1500,
context_window=3900,
# kwargs to pass to __call__()
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
model_kwargs={"n_gpu_layers": 0},
messages_to_prompt=messages_to_prompt,
verbose=True,
)
return llm
def create_query_engine(directory, **kwargs):
data = SimpleDirectoryReader(directory, **kwargs).load_data()
index = VectorStoreIndex.from_documents(data)
query_engine = index.as_query_engine(similarity_top_k=1)
return query_engine
def multiply(a: int, b: int) -> int:
"""Multiple two integers and returns the result integer"""
return a * b
def respond(content, user, instance):
agent_response = agent.chat(content)
output = agent_response.response
sources = agent_response.sources
if sources:
output += f"\nNumber of Sources: {len(sources)}"
return output
# initialize tools
data_query_engine = create_query_engine(
THIS_DIR / "example_data", required_exts=[".txt"]
)
data_query_tool = QueryEngineTool.from_defaults(
data_query_engine,
name="data_tool",
description="Query Engine Tool for Data related to keys",
)
docs_query_engine = create_query_engine(
THIS_DIR / "example_docs", required_exts=[".txt"]
)
docs_query_tool = QueryEngineTool.from_defaults(
docs_query_engine,
name="docs_tool",
description="Query Engine Tool for Documents related to the history of activity and description of what is an agent.",
)
multiply_tool = FunctionTool.from_defaults(fn=multiply)
# initialize llm
llm = load_llm()
# initialize ReAct agent
agent = ReActAgent.from_tools(
[data_query_tool, docs_query_tool, multiply_tool],
llm=llm,
verbose=True,
system_prompt=SYSTEM_PROMPT,
)
# initialize panel
chat_interface = pn.chat.ChatInterface(callback=respond, callback_exception="verbose")
chat_interface.servable()