meta-llama / llama-stack

Model components of the Llama Stack APIs
MIT License
3.95k stars 523 forks source link

Object of type SearchEngineType is not JSON serializable #282

Open Pentar0o opened 4 days ago

Pentar0o commented 4 days ago

Hi,

I try to use the bing search engine tool and I have the Object of type SearchEngineType is not JSON serializable error when executing the agent.create_agent(agent_config) line.

There is my code and I don't know what I'm missing that make this error happening, any help appreciated : class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}")

def create_agent(self, agent_config: AgentConfig):
    agent = self.client.agents.create(
        agent_config=agent_config,
    )
    self.agent_id = agent.agent_id
    session = self.client.agents.session.create(
        agent_id=agent.agent_id,
        session_name="example_session",
    )
    self.session_id = session.session_id

async def execute_turn(self, content: str):
    response = self.client.agents.turn.create(
        agent_id=self.agent_id,
        session_id=self.session_id,
        messages=[
            UserMessage(content=content, role="user"),
        ],
        stream=True,
    )

    for chunk in response:
        if chunk.event.payload.event_type != "turn_complete":
            yield chunk

async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[ SearchToolDefinition( engine=SearchEngineType.bing, api_key=os.getenv("BING_SEARCH_API_KEY"), ) ], tool_choice=ToolChoice.auto, )

try:
    agent = Agent(host, port)
except Exception as erreur:
    print(f"Erreur Agent : {erreur}")

try:
    agent.create_agent(agent_config)
except Exception as erreur:
    print(f"Erreur Create Agent : {erreur}")
cheesecake100201 commented 4 days ago

Since you are using agents, you need to use AgentConfigSearchToolDefinition in the agent_config and not SearchToolDefinition. In the code base checkout the object AgentConfigSearchToolDefinition and use that and for future references, whenever you are using inbuilt tools in agent_config use AgentConfigTools as they are all present in the code already.

Pentar0o commented 4 days ago

Thank you for the answer and I changed my code for :
agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[ AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" ) ], tool_choice=ToolChoice.auto, )

But I still have the following error : Erreur Create Agent : Object of type SearchEngineType is not JSON serializable, any idea what I'm missing ?

cheesecake100201 commented 4 days ago

Can you send your stack trace and the code properly? Start to end

Pentar0o commented 4 days ago

Sure ! I have 0 error on stack : (base) penta@0o-Legion:~$ llama stack run Cortana --disable-ipv6 Using config /home/penta/.llama/builds/conda/Cortana-run.yaml Resolved 12 providers inner-inference => remote::ollama models => routing_table inference => autorouted inner-safety => meta-reference shields => routing_table safety => autorouted inner-memory => meta-reference memory_banks => __routing_table memory => autorouted agents => meta-reference telemetry => meta-reference inspect => builtin__

Initializing Ollama, checking connectivity to server... Serving API memory_banks GET /memory_banks/get GET /memory_banks/list POST /memory_banks/register Serving API safety POST /safety/run_shield Serving API shields GET /shields/get GET /shields/list POST /shields/register Serving API memory POST /memory/insert POST /memory/query Serving API telemetry GET /telemetry/get_trace POST /telemetry/log_event Serving API agents POST /agents/create POST /agents/session/create POST /agents/turn/create POST /agents/delete POST /agents/session/delete POST /agents/session/get POST /agents/step/get POST /agents/turn/get Serving API inspect GET /health GET /providers/list GET /routes/list Serving API inference POST /inference/chat_completion POST /inference/completion POST /inference/embeddings Serving API models GET /models/get GET /models/list POST /models/register

Listening on 0.0.0.0:5000 INFO: Started server process [3199] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:5000 (Press CTRL+C to quit)

And there is my code : import warnings from pathlib import Path import tiktoken from tiktoken.load import load_tiktoken_bpe from termcolor import cprint, colored import fire import asyncio import time from llama_stack_client import LlamaStackClient from llama_stack_client.types import UserMessage from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig, AgentConfigToolSearchToolDefinition, AgentConfigTool

from llama_stack_client.lib.agents import from llama_stack.apis.agents.agents import from llama_stack.apis.agents.client import *

import os from dotenv import load_dotenv

warnings.filterwarnings('ignore')

load_dotenv('fichier.env')

Initialize tiktoken tokenizer

tokenizer_path = "tokenizer.model" num_reserved_special_tokens = 256

mergeable_ranks = load_tiktoken_bpe(tokenizer_path)

special_tokens = [ "<|begin_of_text|>", "<|end_of_text|>", "<|reserved_special_token_0|>", "<|reserved_special_token_1|>", "<|finetune_right_pad_id|>", "<|step_id|>", "<|start_header_id|>", "<|end_header_id|>", "<|eom_id|>", "<|eot_id|>", "<|python_tag|>", ] reserved_tokens = [ f"<|reserved_specialtoken{2 + i}|>" for i in range(num_reserved_special_tokens - len(special_tokens)) ] special_tokens = special_tokens + reserved_tokens

tokenizer = tiktoken.Encoding( name=Path(tokenizer_path).name, pat_str=r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]|\s[\r\n]+|\s+(?!\S)|\s+", mergeable_ranks=mergeable_ranks, special_tokens={token: len(mergeable_ranks) + i for i, token in enumerate(special_tokens)}, )

Define a list of colors

colors = ["red", "green", "yellow", "blue", "magenta", "cyan", "white"]

def colorize_tokens(text):

Tokenize the input

encoded_tokens = tokenizer.encode(text, allowed_special="all")
colorized_text = ""

# Assign each token a color from the list
for i, token in enumerate(encoded_tokens):
    color = colors[i % len(colors)]  # Cycle through the list of colors
    token_text = tokenizer.decode([token])
    colorized_text += colored(token_text, color) + " "

return colorized_text

def count_tokens(text): return len(tokenizer.encode(text, allowed_special="all"))

class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}")

def create_agent(self, agent_config: AgentConfig):
    agent = self.client.agents.create(
        agent_config=agent_config,
    )
    self.agent_id = agent.agent_id
    session = self.client.agents.session.create(
        agent_id=agent.agent_id,
        session_name="example_session",
    )
    self.session_id = session.session_id

async def execute_turn(self, content: str):
    response = self.client.agents.turn.create(
        agent_id=self.agent_id,
        session_id=self.session_id,
        messages=[
            UserMessage(content=content, role="user"),
        ],
        stream=True,
    )

    for chunk in response:
        if chunk.event.payload.event_type != "turn_complete":
            yield chunk

web_search = AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" )

async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[web_search], tool_choice=ToolChoice.auto, )

try:
    agent = Agent(host, port)
except Exception as erreur:
    print(f"Erreur Agent : {erreur}")

try:
    agent.create_agent(agent_config)
except Exception as erreur:
    print(f"Erreur Create Agent : {erreur}")

total_tokens = 0

while True:
    # User input
    cprint("Vous: ", "green", end="")
    question = input()  # User types their question

    if question.lower() == "bye":
        print("Fin de la session.")
        break

    # Tokenize and colorize the user's input
    cprint("Tokenized question:", "yellow")
    colorized_question = colorize_tokens(question)
    print(colorized_question)

    # Count tokens
    question_tokens = count_tokens(question)
    cprint(f"Nombre de tokens dans la question: {question_tokens}", "cyan")

    # Start measuring time
    start_time = time.time()

    #Variable to hold the collected response
    response_text = ""

    #Execute turn and await response
    try:
        response = agent.execute_turn(content=question)
    except Exception as erreur:
        print(f"Erreur pendant l'exécution du tour: {erreur}")

    async for log in EventLogger().log(response):
        if log is not None:
            log.print()
            response_text += str(log)

    # Time taken for inference
    end_time = time.time()
    time_taken = end_time - start_time
    cprint(f"Time taken for inference: {time_taken:.2f} seconds", "cyan")

    # Count response tokens
    response_tokens = count_tokens(response_text)
    cprint(f"Nombre de tokens dans la réponse: {response_tokens}", "cyan")

    # Total tokens check
    total_tokens_turn = question_tokens + response_tokens
    total_tokens += total_tokens_turn
    cprint(f"Nombre de Token de la session : {total_tokens}", "cyan")

    if total_tokens >= 4096:
        cprint("Attention: la limite de contexte de 4096 tokens est atteinte!", "red")

def main(host: str, port: int, stream: bool = True): asyncio.run(run_main(host, port, stream))

if name == "main": fire.Fire(main)

cheesecake100201 commented 4 days ago

Firstly its self.client.agents.sessions.create and self.client.agents.turns.create. You have written session and turn respectively in their place that could be causing an issue. Update that and let me know

class Agent:
    def __init__(self, host, port):
        self.client = LlamaStackClient(base_url=f"http://{host}:{port}")
        self.agent_id = None
        self.session_id = None
    def create_agent(self, agent_config: AgentConfig):
        agent = self.client.agents.create(
                agent_config=agent_config,
            )
        self.agent_id = agent.agent_id
        session = self.client.agents.sessions.create(
            agent_id=agent.agent_id,
            session_name="example_session",
        )
        self.session_id = session.session_id

    def execute_turn(self, content: str):
        response = self.client.agents.turns.create(
            agent_id=self.agent_id,
            session_id=self.session_id,
            messages=[
                UserMessage(content=content, role="user"),
            ],
            stream=True,
        )
        return response
        # for chunk in response:
        #     if chunk.event.payload.event_type != "turn_complete":
        #         yield chunk
Pentar0o commented 4 days ago

Still the same error with your modification : Erreur Create Agent : Object of type SearchEngineType is not JSON serializable

cheesecake100201 commented 4 days ago

I am unable to solve this because I can't see where SearchEngineType is being used in this code

Pentar0o commented 4 days ago

Here : web_search = AgentConfigToolSearchToolDefinition( engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY"), type="brave_search" )

async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=SamplingParams(temperature=0.0, top_p=0.9), enable_session_persistence=False, tools=[web_search], tool_choice=ToolChoice.auto, )

try:
    agent = Agent(host, port)
except Exception as erreur:
    print(f"Erreur Agent : {erreur}")

try:
    agent.create_agent(agent_config)
except Exception as erreur:
    print(f"Erreur Create Agent : {erreur}")
cheesecake100201 commented 4 days ago

You are using AgentConfigToolSearchToolDefinition and not SearchEngineType

class AgentConfigToolSearchToolDefinition(TypedDict, total=False):
    api_key: Required[str]

    engine: Required[Literal["bing", "brave"]]

    type: Required[Literal["brave_search"]]

    input_shields: List[str]

    output_shields: List[str]

    remote_execution: AgentConfigToolSearchToolDefinitionRemoteExecution

This is its definition. SearchEngineType is a object which is Enum

Pentar0o commented 4 days ago

I've tried previously with SearchEngineType (check my first post) and I had the same error.

Pentar0o commented 3 days ago

So with this code I don't have the error anymore but I don't have the web search working either : class Agent: def init(self, host, port): self.client = LlamaStackClient(base_url=f"http://{host}:{port}") self.agent_id = None self.session_id = None

def create_agent(self, agent_config: AgentConfig):
    agent = self.client.agents.create(
        agent_config=agent_config,
    )
    self.agent_id = agent.agent_id
    # Generate a unique session ID using uuid4
    unique_session_name = str(uuid.uuid4())

    # Use the unique session name in your code
    session = self.client.agents.session.create(
                agent_id=agent.agent_id,
                session_name=unique_session_name,
                )
    self.session_id = session.session_id

def execute_turn(self, content: str):
    response = self.client.agents.turn.create(
        agent_id=self.agent_id,
        session_id=self.session_id,
        messages=[
            UserMessage(content=content, role="user"),
        ],
        stream=True,
    )
    return response

sampling_params = SamplingParams( strategy="greedy", temperature=0.0, top_p=0.9, max_tokens=256 )

websearch = [ AgentConfigToolSearchToolDefinition( type="brave_search", engine="brave", api_key=os.getenv("BRAVE_SEARCH_API_KEY") ) ]

Inside run_main function

async def run_main(host: str, port: int, stream: bool = True): agent_config = AgentConfig( model="Llama3.2-3B-Instruct", instructions="You are a helpful assistant and you answer in french concisely and precisely.", sampling_params=sampling_params, enable_session_persistence=False, tools=websearch, tool_choice="auto", tool_prompt_format="function_tag", )

try:
    agent = Agent(host, port)
except Exception as erreur:
    print(f"Erreur Agent : {erreur}")

try:
    agent.create_agent(agent_config)
except Exception as erreur:
    print(f"Erreur Create Agent : {erreur}")

total_tokens = 0

while True:
    # User input
    cprint("Vous: ", "green", end="")
    question = input()  # User types their question

    if question.lower() == "bye":
        print("Fin de la session.")
        break

    # Tokenize and colorize the user's input
    cprint("Tokenized question:", "yellow")
    colorized_question = colorize_tokens(question)
    print(colorized_question)

    # Count tokens
    question_tokens = count_tokens(question)
    cprint(f"Nombre de tokens dans la question: {question_tokens}", "cyan")

    # Start measuring time
    start_time = time.time()

    #Variable to hold the collected response
    response_text = ""

    #Execute turn and await response
    try:
        response = agent.execute_turn(content=question)
    except Exception as erreur:
        print(f"Erreur pendant l'exécution du tour: {erreur}")

    for chunk in response:
        if chunk.event.payload.event_type != "turn_complete":
            if hasattr(chunk.event.payload, 'text_delta_model_response'):
                response_text += chunk.event.payload.text_delta_model_response
                print(chunk.event.payload.text_delta_model_response, end='', flush=True) 

Any help appreciated.