openai / openai-python

The official Python library for the OpenAI API
https://pypi.org/project/openai/
Apache License 2.0
23.16k stars 3.27k forks source link

Structured Outputs via function calling - Descriptions for important keys #1794

Closed rohanbalkondekar closed 1 month ago

rohanbalkondekar commented 1 month ago

Confirm this is a feature request for the Python library and not the underlying OpenAI API.

Describe the feature or improvement you're requesting

I like the new SDK helper to parse the model's output:

from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class ResearchPaperExtraction(BaseModel):
    title: str
    authors: list[str]
    abstract: str
    keywords: list[str] # How do I add a description for this or any of the parameters? 
    # How does the model know exactly how to use this? will it infer on its own or we specify this in the system message? 

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "You are an expert at structured data extraction. You will be given unstructured text from a research paper and should convert it into the given structure."},
        {"role": "user", "content": "..."}
    ],
    response_format=ResearchPaperExtraction,
)

research_paper = completion.choices[0].message.parsed

But how do I get the granularity of using manual schema with the SDK objects? Mainly adding detailed description for the parameters

{
    "name": "get_weather",
    "description": "Fetches the weather in the given location",
    "strict": true,
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The location to get the weather for"
            },
            "unit": {
                "type": "string",
                "description": "The unit to return the temperature in",
                "enum": ["F", "C"]
            }
        },
        "additionalProperties": false,
        "required": ["location", "unit"]
    }
}

Snippets taken from the official OpenAI Documentation.

Additional context

Please improve the documentation. How can I submit requests to improve it? Also I get no response from the OpenAI Developer Forum, thus I raise an issue here (@RobertCraigie is a good guy :)

rohanbalkondekar commented 1 month ago

Example:

import openai
from rich import print
from openai import OpenAI
from dotenv import load_dotenv
from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import List
from enum import Enum
import re

_ = load_dotenv()
client = OpenAI()

from pydantic import BaseModel, Field, field_validator
from datetime import datetime
from typing import List
from enum import Enum
import re

class Gender(str, Enum):
    MALE = "Male"
    FEMALE = "Female"

class IDType(str, Enum):
    PASSPORT = "passport"
    GCC_ID = "gcc_id"

class DocumentID(BaseModel):
    id_type: IDType = Field(
        ...,
        description="Type of identification document, either 'passport' or 'gcc_id'",
    )
    id_number: str = Field(
        ...,
        description="Identification document number without spaces or special characters",
    )
    expiry_date: str  = Field(
        ...,
        description="Expiry date of the identification document in YYYY-MM-DD format"
    )

    @field_validator("expiry_date")
    def validate_expiry_date(cls, value):
        try:
            datetime.strptime(value, "%Y-%m-%d")
        except ValueError:
            raise ValueError("expiry_date must be in YYYY-MM-DD format")
        return value

    @field_validator("id_number")
    def validate_id_number(cls, value):
        if not re.fullmatch(r"[A-Za-z0-9]+", value):
            raise ValueError("id_number must not contain spaces or special characters")
        return value

class Passenger(BaseModel):
    id: str = Field(
        ...,
        description="Unique identifier for the passenger"
    )
    name: str = Field(
        ...,
        description="Full name of the passenger"
    )
    nationality: str = Field(
        ...,
        description="Nationality of the passenger. Convert the user input to ISO 3166-1 alpha-2 formatted country code yourself"
    )
    gender: Gender = Field(
        ...,
        description="Gender of the passenger. Either Male or Female"
    )
    birth_date: str = Field(
        ...,
        description="Birth date of the passenger in YYYY-MM-DD format"
    )
    document_id: DocumentID

    @field_validator("birth_date")
    def validate_birth_date(cls, value):
        try:
            datetime.strptime(value, "%Y-%m-%d")
        except ValueError:
            raise ValueError("birth_date must be in YYYY-MM-DD format")
        return value

class PassengerList(BaseModel):
    """List of Passengers"""

    passengers: List[Passenger] = Field(
        ..., description="List containing passenger details"
    )

tools = [openai.pydantic_function_tool(PassengerList)]
print(tools)

Output:

[
    {
        'type': 'function',
        'function': {
            'name': 'PassengerList',
            'strict': True,
            'parameters': {
                '$defs': {
                    'DocumentID': {
                        'properties': {
                            'id_type': {
                                'description': "Type of identification document, either 'passport' or 'gcc_id'",
                                'enum': ['passport', 'gcc_id'],
                                'title': 'IDType',
                                'type': 'string'
                            },
                            'id_number': {
                                'description': 'Identification document number without spaces or special 
characters',
                                'title': 'Id Number',
                                'type': 'string'
                            },
                            'expiry_date': {
                                'description': 'Expiry date of the identification document in YYYY-MM-DD format',
                                'title': 'Expiry Date',
                                'type': 'string'
                            }
                        },
                        'required': ['id_type', 'id_number', 'expiry_date'],
                        'title': 'DocumentID',
                        'type': 'object',
                        'additionalProperties': False
                    },
                    'Gender': {'enum': ['Male', 'Female'], 'title': 'Gender', 'type': 'string'},
                    'IDType': {'enum': ['passport', 'gcc_id'], 'title': 'IDType', 'type': 'string'},
                    'Passenger': {
                        'properties': {
                            'id': {
                                'description': 'Unique identifier for the passenger',
                                'title': 'Id',
                                'type': 'string'
                            },
                            'name': {
                                'description': 'Full name of the passenger',
                                'title': 'Name',
                                'type': 'string'
                            },
                            'nationality': {
                                'description': 'Nationality of the passenger. Convert the user input to ISO 3166-1 
alpha-2 formatted country code yourself',
                                'title': 'Nationality',
                                'type': 'string'
                            },
                            'gender': {
                                'description': 'Gender of the passenger. Either Male or Female',
                                'enum': ['Male', 'Female'],
                                'title': 'Gender',
                                'type': 'string'
                            },
                            'birth_date': {
                                'description': 'Birth date of the passenger in YYYY-MM-DD format',
                                'title': 'Birth Date',
                                'type': 'string'
                            },
                            'document_id': {'$ref': '#/$defs/DocumentID'}
                        },
                        'required': ['id', 'name', 'nationality', 'gender', 'birth_date', 'document_id'],
                        'title': 'Passenger',
                        'type': 'object',
                        'additionalProperties': False
                    }
                },
                'description': 'List of Passengers',
                'properties': {
                    'passengers': {
                        'description': 'List containing passenger details',
                        'items': {'$ref': '#/$defs/Passenger'},
                        'title': 'Passengers',
                        'type': 'array'
                    }
                },
                'required': ['passengers'],
                'title': 'PassengerList',
                'type': 'object',
                'additionalProperties': False
            },
            'description': 'List of Passengers'
        }
    }
]