Open Pentar0o opened 4 days ago
Since you are using agents, you need to use AgentConfigSearchToolDefinition in the agent_config and not SearchToolDefinition. In the code base checkout the object AgentConfigSearchToolDefinition and use that and for future references, whenever you are using inbuilt tools in agent_config use AgentConfigTools as they are all present in the code already.
Thank you for the answer and I changed my code for :
agent_config = AgentConfig(
model="Llama3.2-3B-Instruct",
instructions="You are a helpful assistant and you answer in french concisely and precisely.",
sampling_params=SamplingParams(temperature=0.0, top_p=0.9),
enable_session_persistence=False,
tools=[
AgentConfigToolSearchToolDefinition(
engine="brave",
api_key=os.getenv("BRAVE_SEARCH_API_KEY"),
type="brave_search"
)
],
tool_choice=ToolChoice.auto,
)
But I still have the following error : Erreur Create Agent : Object of type SearchEngineType is not JSON serializable, any idea what I'm missing ?
Can you send your stack trace and the code properly? Start to end
Sure !
I have 0 error on stack :
(base) penta@0o-Legion:~$ llama stack run Cortana --disable-ipv6
Using config /home/penta/.llama/builds/conda/Cortana-run.yaml
Resolved 12 providers
inner-inference => remote::ollama
models => routing_table
inference => autorouted
inner-safety => meta-reference
shields => routing_table
safety => autorouted
inner-memory => meta-reference
memory_banks => __routing_table
memory => autorouted
agents => meta-reference
telemetry => meta-reference
inspect => builtin__
Initializing Ollama, checking connectivity to server... Serving API memory_banks GET /memory_banks/get GET /memory_banks/list POST /memory_banks/register Serving API safety POST /safety/run_shield Serving API shields GET /shields/get GET /shields/list POST /shields/register Serving API memory POST /memory/insert POST /memory/query Serving API telemetry GET /telemetry/get_trace POST /telemetry/log_event Serving API agents POST /agents/create POST /agents/session/create POST /agents/turn/create POST /agents/delete POST /agents/session/delete POST /agents/session/get POST /agents/step/get POST /agents/turn/get Serving API inspect GET /health GET /providers/list GET /routes/list Serving API inference POST /inference/chat_completion POST /inference/completion POST /inference/embeddings Serving API models GET /models/get GET /models/list POST /models/register
Listening on 0.0.0.0:5000 INFO: Started server process [3199] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:5000 (Press CTRL+C to quit)
And there is my code : import warnings from pathlib import Path import tiktoken from tiktoken.load import load_tiktoken_bpe from termcolor import cprint, colored import fire import asyncio import time from llama_stack_client import LlamaStackClient from llama_stack_client.types import UserMessage from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig, AgentConfigToolSearchToolDefinition, AgentConfigTool
from llama_stack_client.lib.agents import from llama_stack.apis.agents.agents import from llama_stack.apis.agents.client import *
import os from dotenv import load_dotenv
warnings.filterwarnings('ignore')
load_dotenv('fichier.env')
tokenizer_path = "tokenizer.model" num_reserved_special_tokens = 256
mergeable_ranks = load_tiktoken_bpe(tokenizer_path)
special_tokens = [ "<|begin_of_text|>", "<|end_of_text|>", "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>", "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>", ] reserved_tokens = [ f"<|reserved_specialtoken{2 + i}|>" for i in range(num_reserved_special_tokens - len(special_tokens)) ] special_tokens = special_tokens + reserved_tokens
tokenizer = tiktoken.Encoding( name=Path(tokenizer_path).name, pat_str=r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]|\s[\r\n]+|\s+(?!\S)|\s+", mergeable_ranks=mergeable_ranks, special_tokens={token: len(mergeable_ranks) + i for i, token in enumerate(special_tokens)}, )
colors = ["red", "green", "yellow", "blue", "magenta", "cyan", "white"]
def colorize_tokens(text):
encoded_tokens = tokenizer.encode(text, allowed_special="all")
colorized_text = ""
# Assign each token a color from the list
for i, token in enumerate(encoded_tokens):
color = colors[i % len(colors)] # Cycle through the list of colors
token_text = tokenizer.decode([token])
colorized_text += colored(token_text, color) + " "
return colorized_text
def count_tokens(text): return len(tokenizer.encode(text, allowed_special="all"))
class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
session = self.client.agents.session.create(
agent_id=agent.agent_id,
session_name="example_session",
)
self.session_id = session.session_id
async def execute_turn(self, content: str):
response = self.client.agents.turn.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
for chunk in response:
if chunk.event.payload.event_type != "turn_complete":
yield chunk
web_search = AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" )
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[web_search], tool_choice=ToolChoice.auto, )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
total_tokens = 0
while True:
# User input
cprint("Vous: ", "green", end="")
question = input() # User types their question
if question.lower() == "bye":
print("Fin de la session.")
break
# Tokenize and colorize the user's input
cprint("Tokenized question:", "yellow")
colorized_question = colorize_tokens(question)
print(colorized_question)
# Count tokens
question_tokens = count_tokens(question)
cprint(f"Nombre de tokens dans la question: {question_tokens}", "cyan")
# Start measuring time
start_time = time.time()
#Variable to hold the collected response
response_text = ""
#Execute turn and await response
try:
response = agent.execute_turn(content=question)
except Exception as erreur:
print(f"Erreur pendant l'exécution du tour: {erreur}")
async for log in EventLogger().log(response):
if log is not None:
log.print()
response_text += str(log)
# Time taken for inference
end_time = time.time()
time_taken = end_time - start_time
cprint(f"Time taken for inference: {time_taken:.2f} seconds", "cyan")
# Count response tokens
response_tokens = count_tokens(response_text)
cprint(f"Nombre de tokens dans la réponse: {response_tokens}", "cyan")
# Total tokens check
total_tokens_turn = question_tokens + response_tokens
total_tokens += total_tokens_turn
cprint(f"Nombre de Token de la session : {total_tokens}", "cyan")
if total_tokens >= 4096:
cprint("Attention: la limite de contexte de 4096 tokens est atteinte!", "red")
def main(host: str, port: int, stream: bool = True): asyncio.run(run_main(host, port, stream))
if name == "main": fire.Fire(main)
Firstly its self.client.agents.sessions.create and self.client.agents.turns.create. You have written session and turn respectively in their place that could be causing an issue. Update that and let me know
class Agent:
def __init__(self, host, port):
self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
self.agent_id = None
self.session_id = None
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
session = self.client.agents.sessions.create(
agent_id=agent.agent_id,
session_name="example_session",
)
self.session_id = session.session_id
def execute_turn(self, content: str):
response = self.client.agents.turns.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
return response
# for chunk in response:
# if chunk.event.payload.event_type != "turn_complete":
# yield chunk
Still the same error with your modification : Erreur Create Agent : Object of type SearchEngineType is not JSON serializable
I am unable to solve this because I can't see where SearchEngineType is being used in this code
Here : web_search = AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" )
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[web_search], tool_choice=ToolChoice.auto, )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
You are using AgentConfigToolSearchToolDefinition and not SearchEngineType
class AgentConfigToolSearchToolDefinition(TypedDict, total=False):
api_key: Required[str]
engine: Required[Literal["bing", "brave"]]
type: Required[Literal["brave_search"]]
input_shields: List[str]
output_shields: List[str]
remote_execution: AgentConfigToolSearchToolDefinitionRemoteExecution
This is its definition. SearchEngineType is a object which is Enum
I've tried previously with SearchEngineType (check my first post) and I had the same error.
So with this code I don't have the error anymore but I don't have the web search working either : class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.agent_id = None self.session_id = None
def create_agent(self, agent_config: AgentConfig):
agent = self.client.agents.create(
agent_config=agent_config,
)
self.agent_id = agent.agent_id
# Generate a unique session ID using uuid4
unique_session_name = str(uuid.uuid4())
# Use the unique session name in your code
session = self.client.agents.session.create(
agent_id=agent.agent_id,
session_name=unique_session_name,
)
self.session_id = session.session_id
def execute_turn(self, content: str):
response = self.client.agents.turn.create(
agent_id=self.agent_id,
session_id=self.session_id,
messages=[
UserMessage(content=content, role="user"),
],
stream=True,
)
return response
sampling_params = SamplingParams( strategy="greedy", temperature=0.0, top_p=0.9, max_tokens=256 )
websearch = [ AgentConfigToolSearchToolDefinition( type="brave_search", engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY") ) ]
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=sampling_params, enable_session_persistence=False, tools=websearch, tool_choice="auto", tool_prompt_format="function_tag", )
try:
agent = Agent(host, port)
except Exception as erreur:
print(f"Erreur Agent : {erreur}")
try:
agent.create_agent(agent_config)
except Exception as erreur:
print(f"Erreur Create Agent : {erreur}")
total_tokens = 0
while True:
# User input
cprint("Vous: ", "green", end="")
question = input() # User types their question
if question.lower() == "bye":
print("Fin de la session.")
break
# Tokenize and colorize the user's input
cprint("Tokenized question:", "yellow")
colorized_question = colorize_tokens(question)
print(colorized_question)
# Count tokens
question_tokens = count_tokens(question)
cprint(f"Nombre de tokens dans la question: {question_tokens}", "cyan")
# Start measuring time
start_time = time.time()
#Variable to hold the collected response
response_text = ""
#Execute turn and await response
try:
response = agent.execute_turn(content=question)
except Exception as erreur:
print(f"Erreur pendant l'exécution du tour: {erreur}")
for chunk in response:
if chunk.event.payload.event_type != "turn_complete":
if hasattr(chunk.event.payload, 'text_delta_model_response'):
response_text += chunk.event.payload.text_delta_model_response
print(chunk.event.payload.text_delta_model_response, end='', flush=True)
Any help appreciated.
Hi,
I try to use the bing search engine tool and I have the Object of type SearchEngineType is not JSON serializable error when executing the agent.create_agent(agent_config) line.
There is my code and I don't know what I'm missing that make this error happening, any help appreciated : class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[ SearchToolDefinition( engine=SearchEngineType.bing, api_key=os.getenv("BING_SEARCH_API_KEY"), ) ], tool_choice=ToolChoice.auto, )