Closed simonw closed 2 weeks ago
I tried applying this change:
diff --git a/llm/cli.py b/llm/cli.py
index 941831c..d454719 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -250,7 +250,13 @@ def prompt(
bits.append(prompt)
prompt = " ".join(bits)
- if prompt is None and not save and sys.stdin.isatty():
+ if (
+ prompt is None
+ and not save
+ and sys.stdin.isatty()
+ and not attachments
+ and not attachment_types
+ ):
# Hang waiting for input to stdin (unless --save)
prompt = sys.stdin.read()
return prompt
And ran this:
llm -m gpt-4o-audio-preview \
-a https://static.simonwillison.net/static/2024/pelican-joke-request.mp3
But I got this error:
Error: Error code: 400 - {'error': {'message': "Invalid chat format. Expected 'text' field in text type content part to be a string.", 'type': 'invalid_request_error', 'param': None, 'code': None}}
So it looks like this change needs to be supported by individual model plugins. In this case the OpenAI plugin is having a problem here:
Because prompt.prompt
can now be None
in some cases.
This fix works for OpenAI:
diff --git a/llm/cli.py b/llm/cli.py
index 941831c..d454719 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -250,7 +250,13 @@ def prompt(
bits.append(prompt)
prompt = " ".join(bits)
- if prompt is None and not save and sys.stdin.isatty():
+ if (
+ prompt is None
+ and not save
+ and sys.stdin.isatty()
+ and not attachments
+ and not attachment_types
+ ):
# Hang waiting for input to stdin (unless --save)
prompt = sys.stdin.read()
return prompt
diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
index 81d0cc0..6944df6 100644
--- a/llm/default_plugins/openai_models.py
+++ b/llm/default_plugins/openai_models.py
@@ -346,9 +346,11 @@ class Chat(Model):
)
current_system = prev_response.prompt.system
if prev_response.attachments:
- attachment_message = [
- {"type": "text", "text": prev_response.prompt.prompt}
- ]
+ attachment_message = []
+ if prev_response.prompt.prompt:
+ attachment_message.append(
+ {"type": "text", "text": prev_response.prompt.prompt}
+ )
for attachment in prev_response.attachments:
attachment_message.append(_attachment(attachment))
messages.append({"role": "user", "content": attachment_message})
@@ -362,7 +364,9 @@ class Chat(Model):
if not prompt.attachments:
messages.append({"role": "user", "content": prompt.prompt})
else:
- attachment_message = [{"type": "text", "text": prompt.prompt}]
+ attachment_message = []
+ if prompt.prompt:
+ attachment_message.append({"type": "text", "text": prompt.prompt})
for attachment in prompt.attachments:
attachment_message.append(_attachment(attachment))
messages.append({"role": "user", "content": attachment_message})
That ensures that llm -c "another about a walrus"
works too.
With that diff in place the example invocation produced this after I ran llm logs -c --json
:
[
{
"id": "01jbzyk4r4tjdcpjtmar7wymcf",
"model": "gpt-4o-audio-preview",
"prompt": null,
"system": null,
"prompt_json": {
"messages": [
{
"role": "user",
"content": [
{
"type": "input_audio",
"input_audio": {
"data": "...",
"format": "mp3"
}
}
]
}
]
},
"options_json": {},
"response": "Why did the pelican get kicked out of the restaurant?\nBecause he had a very big bill!",
"response_json": {
"content": "Why did the pelican get kicked out of the restaurant?\nBecause he had a very big bill!",
"finish_reason": "stop",
"usage": {
"completion_tokens": 20,
"prompt_tokens": 55,
"total_tokens": 75,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 44
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"id": "chatcmpl-AQSjeGXYIwHvDsPxk4E2gs3pxUDKG",
"object": "chat.completion.chunk",
"model": "gpt-4o-audio-preview-2024-10-01",
"created": 1730870350
},
"conversation_id": "01jbzyk4r25b0bk49n51temd5w",
"duration_ms": 2887,
"datetime_utc": "2024-11-06T05:19:07.703285",
"conversation_name": "",
"conversation_model": "gpt-4o-audio-preview",
"attachments": [
{
"id": "baa308f3b1597f093eca7da609293c4b420061af1a88b0008e2bf338126b5e67",
"type": "audio/mpeg",
"path": null,
"url": "https://static.simonwillison.net/static/2024/pelican-joke-request.mp3",
"content_length": null
}
]
},
{
"id": "01jbzympq60bjaqb6dyad67jme",
"model": "gpt-4o-audio-preview",
"prompt": "another about a walrus",
"system": null,
"prompt_json": {
"messages": [
{
"role": "user",
"content": [
{
"type": "input_audio",
"input_audio": {
"data": "...",
"format": "mp3"
}
}
]
},
{
"role": "assistant",
"content": "Why did the pelican get kicked out of the restaurant?\nBecause he had a very big bill!"
},
{
"role": "user",
"content": "another about a walrus"
}
]
},
"options_json": {},
"response": "Why do walruses never make a good basketball team?\nBecause they're always afraid of getting called for \"foul\"!",
"response_json": {
"content": "Why do walruses never make a good basketball team?\nBecause they're always afraid of getting called for \"foul\"!",
"finish_reason": "stop",
"usage": {
"completion_tokens": 24,
"prompt_tokens": 88,
"total_tokens": 112,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 44
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"id": "chatcmpl-AQSkT3NBfQD9c2h2jjOEjwt0y5Uyp",
"object": "chat.completion.chunk",
"model": "gpt-4o-audio-preview-2024-10-01",
"created": 1730870401
},
"conversation_id": "01jbzyk4r25b0bk49n51temd5w",
"duration_ms": 2715,
"datetime_utc": "2024-11-06T05:19:59.049365",
"conversation_name": "",
"conversation_model": "gpt-4o-audio-preview",
"attachments": []
}
]
I'm OK with this solution for the moment - I think it's fine that plugins should consider the case where there are attachments but no prompt.
I'll add that to the advanced plugin documentation.
I checked and this works for images too:
llm -m gpt-4o-mini -a joke-about-pelicans-and-walruses.png
Why did the pelicans and walruses decide to live together?
Because they wanted to create a fin-tastic community—where everyone can flap around and haul out together!
llm-gemini
breaks on this:
llm -m gemini-1.5-flash-latest -a joke-about-pelicans-and-walruses.png
Error: * GenerateContentRequest.contents[0].parts[0].data: required oneof field 'data' must have one initialized field
Originally posted by @simonw in https://github.com/simonw/llm/issues/608#issuecomment-2458739809