Open krrishdholakia opened 2 months ago
Code for repro
import time, os
from litellm import completion
# set env
os.environ["ANTHROPIC_API_KEY"] = "sk-..."
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
start_time = time.time()
response = completion(
model="anthropic/claude-3-opus-20240229",
messages=messages,
tools=tools,
tool_choice="auto",
)
end_time = time.time()
print(f"Total time: {round(end_time - start_time, 5)}")
import os
from anthropic import Anthropic
client = Anthropic(
# This is the default and can be omitted
api_key=os.environ.get("ANTHROPIC_API_KEY"),
)
tools = [
{
"name": "get_weather",
"description": "Get the current weather in a given location",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
},
"required": ["location"]
}
}
]
start_time = time.time()
response = client.beta.tools.messages.create(
model="claude-3-opus-20240229",
max_tokens=1024,
tools=[
{
"name": "get_weather",
"description": "Get the current weather in a given location",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
}
},
"required": ["location"],
},
}
],
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
)
end_time = time.time()
print(f"Total time: {round(end_time - start_time, 5)}")
I suggest adding otel tracing to litellm itself so we can analyze which part takes time.
i suspect this is caused by client initialization - in the anthropic example this is happening outside the call
what would good otel tracing look like?
What happened?
Seeing a 1s latency diff b/w the litellm sdk and anthropic sdk, for tool calling on claude-3 opus
I believe this is caused by creating a new client on each call
Relevant log output
Twitter / LinkedIn details
No response