Closed HabermannR closed 3 weeks ago
Hi @HabermannR, thanks for reporting this.
"my old use case does not work at all anymore" - wanted to dig into this. Did this exact schema you've provided used to work with an older version of LM Studio? If so, would you be able to share what version?
The current process of using structured output is totally too difficult to use correctly - we will work to fix this. For now, I was able to modify your schema slightly to get it to work correctly in both the UI and the Server that can hopefully fix your use-case in the short term.
UI:
{
"properties": {
"choices": {
"items": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"description": {
"title": "Description",
"type": "string"
},
"development": {
"enum": [
"magical",
"hybrid",
"practical"
],
"title": "DevelopmentType",
"type": "string"
},
"stance": {
"enum": [
"peaceful",
"neutral",
"aggressive"
],
"title": "DiplomaticStance",
"type": "string"
}
},
"required": [
"name",
"description",
"development",
"stance"
],
"title": "TribeType",
"type": "object"
},
"title": "Choices",
"type": "array"
}
},
"required": [
"choices"
],
"title": "InitialChoices",
"type": "object"
}
Server full example (python):
from datetime import datetime, timedelta
import json
import random
from openai import OpenAI
# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
model = "lmstudio-community/meta-llama-3.1-8b-instruct"
messages = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": "Create 1-3 characters"}
]
response_format = {
"type": "json_schema",
"json_schema": {
"name": "test_schema",
"strict": True,
"schema": {
"properties": {
"choices": {
"items": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"description": {
"title": "Description",
"type": "string"
},
"development": {
"enum": [
"magical",
"hybrid",
"practical"
],
"title": "DevelopmentType",
"type": "string"
},
"stance": {
"enum": [
"peaceful",
"neutral",
"aggressive"
],
"title": "DiplomaticStance",
"type": "string"
}
},
"required": [
"name",
"description",
"development",
"stance"
],
"title": "TribeType",
"type": "object",
"additionalProperties": False
},
"title": "Choices",
"type": "array",
"minItems": 1,
}
},
"required": [
"choices"
],
"title": "InitialChoices",
"type": "object",
"additionalProperties": False
},
}
}
response = client.chat.completions.create(
model=model,
messages=messages,
response_format=response_format,
)
content = response.choices[0].message.content
parsed_content = json.loads(content)
formatted_content = json.dumps(parsed_content, indent=2)
print(formatted_content)
That gives me output:
{
"choices": [
{
"name": "Eira Shadowglow",
"description": "A mysterious and agile young woman with long silver hair and piercing emerald eyes.",
"development": "practical",
"stance": "aggressive"
},
{
"name": "Kael Darkhaven",
"description": "A brooding yet charismatic man with jet-black hair and intense indigo eyes, often shrouded in the shadows.",
"development": "magical",
"stance": "aggressive"
},
{
"name": "Lila Moonwhisper",
"description": "An enchanting woman with long, curly brown hair and a warm, golden smile that lights up a room.",
"development": "magical",
"stance": "peaceful"
}
]
}
Would you be able to test if this works for you?
Thanks for coming back so soon! It work great in 0.3.4. Your approach works in the gui, as well as in code, and even aprsing back into an object works! Now the question is how to get this json scheme? I use: def create_json_schema(model: Type[BaseModel]) -> Dict[str, Any]: schema = model.model_json_schema() return {"type": "json_schema", "json_schema": {"schema": schema}}
Which results in: {'type': 'json_schema', 'json_schema': {'schema': {'$defs': {'DevelopmentType': {'enum': ['magical', 'hybrid', 'practical'], 'title': 'DevelopmentType', 'type': 'string'}, 'DiplomaticStance': {'enum': ['peaceful', 'neutral', 'aggressive'], 'title': 'DiplomaticStance', 'type': 'string'}, 'TribeType': {'properties': {'name': {'title': 'Name', 'type': 'string'}, 'description': {'title': 'Description', 'type': 'string'}, 'development': {'$ref': '#/$defs/DevelopmentType'}, 'stance': {'$ref': '#/$defs/DiplomaticStance'}}, 'required': ['name', 'description', 'development', 'stance'], 'title': 'TribeType', 'type': 'object'}}, 'properties': {'choices': {'items': {'$ref': '#/$defs/TribeType'}, 'title': 'Choices', 'type': 'array'}}, 'required': ['choices'], 'title': 'InitialChoices', 'type': 'object'}}}
I will try a bit, but it would be best to just be able to give the LLM the json directly created from the pydantic class, like it worked in 0.3.4
Ah hah! You have caught a bug in the Pydantic use case :) Working on a fix
Thanks! Great news! But maybe still worth thinking about simplifying the output on my end?
Fix coming in the next updates/betas that will enable:
from typing import Any, Dict, List, Type, Literal
from openai import OpenAI
from pydantic import (
BaseModel,
ConfigDict,
)
import json
# Create the Pydantic models
class TribeType(BaseModel):
model_config = ConfigDict(extra='forbid') # This sets additionalProperties to false
name: str
description: str
development: Literal["magical", "hybrid", "practical"]
stance: Literal["peaceful", "neutral", "aggressive"]
class InitialChoices(BaseModel):
model_config = ConfigDict(extra='forbid') # This sets additionalProperties to false
choices: List[TribeType]
# Create the final JSON schema expected by OpenAI API
def create_json_schema(model: Type[BaseModel]) -> Dict[str, Any]:
schema = model.model_json_schema()
return {
"type": "json_schema",
"json_schema": {
"name": "test_schema",
"strict": True,
"schema": schema,
},
}
# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
model = "lmstudio-community/meta-llama-3.1-8b-instruct"
messages = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": "Create 1-3 characters"}
]
print(create_json_schema(InitialChoices))
response = client.chat.completions.create(
model=model,
messages=messages,
response_format=create_json_schema(InitialChoices),
)
content = response.choices[0].message.content
parsed_content = json.loads(content)
formatted_content = json.dumps(parsed_content, indent=2)
print(formatted_content)
The problem was that we don't currently have support for the $ref/$def
format
Great, thanks!
Claude gave me this code, which is working: def resolve_refs(schema: dict, definitions: dict) -> dict: """Recursively resolve all $ref references in a JSON schema.""" if isinstance(schema, dict): if '$ref' in schema: ref_path = schema['$ref'] if ref_path.startswith('#/$defs/'): ref_name = ref_path.split('/')[-1] return resolve_refs(definitions[ref_name], definitions)
return {k: resolve_refs(v, definitions) for k, v in schema.items() if k != '$defs'}
elif isinstance(schema, list):
return [resolve_refs(item, definitions) for item in schema]
else:
return schema
def create_json_schema(model: Type[BaseModel]) -> Dict[str, Any]: schema = model.model_json_schema()
# Store the definitions
definitions = schema.get('$defs', {})
# Resolve all references
resolved_schema = resolve_refs(schema, definitions)
return {
"type": "json_schema",
"json_schema": {
"name": "test_schema",
"strict": True,
"schema": resolved_schema,
},
}
Awesome!
Latest beta available here: https://lmstudio.ai/beta-releases
You guys are crazy! Can confirm it works And I also had this bug, great you fixed it: Fix for pasting text from Microsoft Word giving an error about pasting an image
Release notes: Update llama.cpp-based JSON response generation; now supports more complex JSON schemas
But for me, my old use case does not work at all anymore: Setting this in the gui works when calling it like this: completion = self.client.chat.completions.create( model=self.model_config.model_name, messages=messages)
{ "type": "object", "schema": { "properties": { "choices": { "items": { "properties": { "name": { "title": "Name", "type": "string" }, "description": { "title": "Description", "type": "string" }, "development": { "enum": [ "magical", "hybrid", "practical" ], "title": "DevelopmentType", "type": "string" }, "stance": { "enum": [ "peaceful", "neutral", "aggressive" ], "title": "DiplomaticStance", "type": "string" } }, "required": [ "name", "description", "development", "stance" ], "title": "TribeType", "type": "object" }, "title": "Choices", "type": "array" } }, "required": [ "choices" ], "title": "InitialChoices", "type": "object" } }
but I can not call it with the "object": completion = self.client.chat.completions.create( model=self.model_config.model_name, messages=messages, response_format=create_json_schema(response_model), max_tokens=max_tokens ) openai.BadRequestError: Error code: 400 - {'error': "'response_format.type' must be 'json_schema'"}
Setting it to 'json_schema', both in the gui, as well as in the python code, gives an error: openai.BadRequestError: Error code: 400 - {'error': ' Invalid structured output configuration: data/type must be equal to one of the allowed values, data/type must be array, data/type must match a schema in anyOf. Error Data: n/a, Additional Data: n/a'}