Enable GPT o1 in Agenta

Hey @manantyagi25,

I apologize, but it looks like this is going to take a bit more time. The issue is as follows:

We're using the same code for the single prompt template in both cloud and open-source versions.
We're using our own API keys to help users get started quickly in the cloud version.
O1 is extremely expensive. With our current structure, we can't easily add O1 to our templates. We'd need to either create different templates for cloud and open-source versions or change how we handle API keys and models in the cloud version.

This means we can't quickly update the templates to include O1. However, there is a solution. You can create an application from code that uses the same code as our template and includes O1 as an option. Here's the code you should use:

import agenta as ag
import litellm

litellm.drop_params = True

supported_llm_models = {
    "Mistral AI": [
        "mistral/mistral-tiny",
        "mistral/mistral-small",
        "mistral/mistral-medium",
        "mistral/mistral-large-latest",
    ],
    "Open AI": [
        "gpt-3.5-turbo-1106",
        "gpt-3.5-turbo",
        "gpt-4",
        "gpt-4o",
        "gpt-4o-mini",
        "gpt-4-1106-preview",
        "o1-preview",
        "o1-mini"
    ],
    "Gemini": ["gemini/gemini-1.5-pro-latest", "gemini/gemini-1.5-flash"],
    "Cohere": [
        "cohere/command-light",
        "cohere/command-r-plus",
        "cohere/command-nightly",
    ],
    "Anthropic": [
        "anthropic/claude-3-5-sonnet-20240620",
        "anthropic/claude-3-opus-20240229",
        "anthropic/claude-3-sonnet-20240229",
        "anthropic/claude-3-haiku-20240307",
        "anthropic/claude-2.1",
        "anthropic/claude-2",
        "anthropic/claude-instant-1.2",
        "anthropic/claude-instant-1",
    ],
    "Anyscale": [
        "anyscale/meta-llama/Llama-2-13b-chat-hf",
        "anyscale/meta-llama/Llama-2-70b-chat-hf",
    ],
    "Perplexity AI": [
        "perplexity/pplx-7b-chat",
        "perplexity/pplx-70b-chat",
        "perplexity/pplx-7b-online",
        "perplexity/pplx-70b-online",
    ],
    "DeepInfra": [
        "deepinfra/meta-llama/Llama-2-70b-chat-hf",
        "deepinfra/meta-llama/Llama-2-13b-chat-hf",
        "deepinfra/codellama/CodeLlama-34b-Instruct-hf",
        "deepinfra/mistralai/Mistral-7B-Instruct-v0.1",
        "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1",
    ],
    "Together AI": [
        "together_ai/togethercomputer/llama-2-70b-chat",
        "together_ai/togethercomputer/llama-2-70b",
        "together_ai/togethercomputer/LLaMA-2-7B-32K",
        "together_ai/togethercomputer/Llama-2-7B-32K-Instruct",
        "together_ai/togethercomputer/llama-2-7b",
        "together_ai/togethercomputer/alpaca-7b",
        "together_ai/togethercomputer/CodeLlama-34b-Instruct",
        "together_ai/togethercomputer/CodeLlama-34b-Python",
        "together_ai/WizardLM/WizardCoder-Python-34B-V1.0",
        "together_ai/NousResearch/Nous-Hermes-Llama2-13b",
        "together_ai/Austism/chronos-hermes-13b",
    ],
    "Aleph Alpha": [
        "luminous-base",
        "luminous-base-control",
        "luminous-extended-control",
        "luminous-supreme",
    ],
    "OpenRouter": [
        "openrouter/openai/gpt-3.5-turbo",
        "openrouter/openai/gpt-3.5-turbo-16k",
        "openrouter/anthropic/claude-instant-v1",
        "openrouter/google/palm-2-chat-bison",
        "openrouter/google/palm-2-codechat-bison",
        "openrouter/meta-llama/llama-2-13b-chat",
        "openrouter/meta-llama/llama-2-70b-chat",
    ],
    "Groq": [
        "groq/llama3-8b-8192",
        "groq/llama3-70b-8192",
        "groq/llama2-70b-4096",
        "groq/mixtral-8x7b-32768",
        "groq/gemma-7b-it",
    ],
}

prompts = {
    "system_prompt": "You are an expert in geography.",
    "user_prompt": """What is the capital of {country}?""",
}

GPT_FORMAT_RESPONSE = ["gpt-3.5-turbo-1106", "gpt-4-1106-preview"]

ag.init()
ag.config.default(
    temperature=ag.FloatParam(default=1, minval=0.0, maxval=2.0),
    model=ag.GroupedMultipleChoiceParam(
        default="gpt-3.5-turbo", choices=supported_llm_models
    ),
    max_tokens=ag.IntParam(-1, -1, 4000),
    prompt_system=ag.TextParam(prompts["system_prompt"]),
    prompt_user=ag.TextParam(prompts["user_prompt"]),
    top_p=ag.FloatParam(1),
    frequence_penalty=ag.FloatParam(default=0.0, minval=-2.0, maxval=2.0),
    presence_penalty=ag.FloatParam(default=0.0, minval=-2.0, maxval=2.0),
    force_json=ag.BinaryParam(False),
)

@ag.instrument(spankind="llm")
async def llm_call(prompt_system: str, prompt_user: str):
    response_format = (
        {"type": "json_object"}
        if ag.config.force_json and ag.config.model in GPT_FORMAT_RESPONSE
        else {"type": "text"}
    )
    max_tokens = ag.config.max_tokens if ag.config.max_tokens != -1 else None

    # Include frequency_penalty and presence_penalty only if supported
    completion_params = {}
    if ag.config.model in GPT_FORMAT_RESPONSE:
        completion_params["frequency_penalty"] = ag.config.frequence_penalty
        completion_params["presence_penalty"] = ag.config.presence_penalty

    response = await litellm.acompletion(
        **{
            "model": ag.config.model,
            "messages": [
                {"content": prompt_system, "role": "system"},
                {"content": prompt_user, "role": "user"},
            ],
            "temperature": ag.config.temperature,
            "max_tokens": max_tokens,
            "top_p": ag.config.top_p,
            "response_format": response_format,
            **completion_params,
        }
    )
    token_usage = response.usage.dict()
    return {
        "message": response.choices[0].message.content,
        "usage": token_usage,
        "cost": litellm.cost_calculator.completion_cost(
            completion_response=response, model=ag.config.model
        ),
    }

@ag.entrypoint
@ag.instrument()
async def generate(
    inputs: ag.DictInput = ag.DictInput(default_keys=["country"]),
):
    try:
        prompt_user = ag.config.prompt_user.format(**inputs)
    except Exception as e:
        prompt_user = ag.config.prompt_user
    try:
        prompt_system = ag.config.prompt_system.format(**inputs)
    except Exception as e:
        prompt_system = ag.config.prompt_system

    # SET MAX TOKENS - via completion()
    if ag.config.force_json and ag.config.model not in GPT_FORMAT_RESPONSE:
        raise ValueError(
            "Model {} does not support JSON response format".format(ag.config.model)
        )

    response = await llm_call(prompt_system=prompt_system, prompt_user=prompt_user)
    return {
        "message": response["message"],
        "usage": response.get("usage", None),
        "cost": response.get("cost", None),
    }

In requirements.txt, use:

openai
agenta
litellm

In .env, use:

OPENAI_API_KEY=sk-xxx

Create all these files in one folder, then after running pip install -U agenta, execute agenta init and agenta variant serve app.py. You should now be able to see an app in Agenta that includes O1 preview and mini options.

Hope this helps!

Agenta-AI / agenta

Enable GPT o1 in Agenta #2085