Closed cumthxy closed 9 months ago
share prompt, i can't do anything with this.
share prompt, i can't do anything with this.
try using this instead, https://github.com/jxnl/instructor/blob/main/examples/vision/run_table.py ! I'lll delete this example and replace it with markdown
from openai import OpenAI
from io import StringIO
from typing import Annotated, Any, Iterable
from openai import OpenAI
from pydantic import (
BaseModel,
BeforeValidator,
PlainSerializer,
InstanceOf,
WithJsonSchema,
)
import pandas as pd
from tomlkit import table
import instructor
client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON)
def md_to_df(data: Any) -> Any:
if isinstance(data, str):
return (
pd.read_csv(
StringIO(data), # Get rid of whitespaces
sep="|",
index_col=1,
)
.dropna(axis=1, how="all")
.iloc[1:]
.map(lambda x: x.strip())
)
return data
MarkdownDataFrame = Annotated[
InstanceOf[pd.DataFrame],
BeforeValidator(md_to_df),
PlainSerializer(lambda x: x.to_markdown()),
WithJsonSchema(
{
"type": "string",
"description": """
The markdown representation of the table,
each one should be tidy, do not try to join tables
that should be seperate""",
}
),
]
class Table(BaseModel):
caption: str
dataframe: MarkdownDataFrame
client = instructor.patch(OpenAI())
tables = client.chat.completions.create(
model="gpt-3.5-turbo",
response_model=Iterable[Table],
messages=[
{
"role": "system",
"content": "Please extract the tables from the following text, merge as much as possible:",
},
{
"role": "user",
"content": """
My name is John and I am 25 years old. I live in
New York and I like to play basketball. His name is
Mike and he is 30 years old. He lives in San Francisco
and he likes to play baseball. Sarah is 20 years old
and she lives in Los Angeles. She likes to play tennis.
Her name is Mary and she is 35 years old.
She lives in Chicago.
""",
},
],
)
for table in tables:
print(table.caption)
print(table.dataframe)
print()
"""
People
Age City Hobby
Name
John 25 New York Basketball
Mike 30 San Francisco Baseball
Sarah 20 Los Angeles Tennis
Mary 35 Chicago N/A
"""
thanks for finding the regression, going to delete the example to use this one isntead.
from openai import OpenAI from io import StringIO from typing import Annotated, Any, Iterable from openai import OpenAI from pydantic import ( BaseModel, BeforeValidator, PlainSerializer, InstanceOf, WithJsonSchema, ) import pandas as pd import instructor client = instructor.patch(OpenAI(), mode=instructor.function_calls.Mode.MD_JSON) def md_to_df(data: Any) -> Any: if isinstance(data, str): return ( pd.read_csv( StringIO(data), # Get rid of whitespaces sep="|", index_col=1, ) .dropna(axis=1, how="all") .iloc[1:] .map(lambda x: x.strip()) ) return data MarkdownDataFrame = Annotated[ InstanceOf[pd.DataFrame], BeforeValidator(md_to_df), PlainSerializer(lambda x: x.to_markdown()), WithJsonSchema( { "type": "string", "description": """ The markdown representation of the table, each one should be tidy, do not try to join tables that should be seperate""", } ), ] class Table(BaseModel): caption: str dataframe: MarkdownDataFrame client = instructor.patch(OpenAI()) tables = client.chat.completions.create( model="gpt-3.5-turbo", response_model=Iterable[Table], messages=[ { "role": "system", "content": "Please extract the tables from the following text, merge as much as possible:", }, { "role": "user", "content": """ My name is John and I am 25 years old. I live in New York and I like to play basketball. His name is Mike and he is 30 years old. He lives in San Francisco and he likes to play baseball. Sarah is 20 years old and she lives in Los Angeles. She likes to play tennis. Her name is Mary and she is 35 years old. She lives in Chicago. """, }, ], ) for table in tables: print(table.caption) print(table.dataframe) print() """ People Age City Hobby Name John 25 New York Basketball Mike 30 San Francisco Baseball Sarah 20 Los Angeles Tennis Mary 35 Chicago N/A """
thanks for finding the regression, going to delete the example to use this one isntead.
why i get error in the old code?
probably the llm changed
Traceback (most recent call last): File "/Users/huangxingyu/daily/chatgpt/1.py", line 65, in
df = dataframe(
File "/Users/huangxingyu/daily/chatgpt/1.py", line 61, in dataframe
return Dataframe.from_response(completion)
File "/Users/huangxingyu/anaconda3/envs/python3.9/lib/python3.9/site-packages/instructor/function_calls.py", line 138, in from_response
return cls.model_validate_json(
File "/Users/huangxingyu/anaconda3/envs/python3.9/lib/python3.9/site-packages/pydantic/main.py", line 532, in model_validate_json
return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)
pydantic_core._pydantic_core.ValidationError: 4 validation errors for Dataframe
data.0
Input should be an object [type=model_type, input_value=['John', 25, 'New York', 'basketball'], input_type=list]
For further information visit https://errors.pydantic.dev/2.5/v/model_type
data.1
Input should be an object [type=model_type, input_value=['Mike', 30, 'San Francisco', 'baseball'], input_type=list]
For further information visit https://errors.pydantic.dev/2.5/v/model_type
data.2
Input should be an object [type=model_type, input_value=['Sarah', 20, 'Los Angeles', 'tennis'], input_type=list]
For further information visit https://errors.pydantic.dev/2.5/v/model_type
data.3
Input should be an object [type=model_type, input_value=['Mary', 35, 'Chicago', None], input_type=list]
For further information visit https://errors.pydantic.dev/2.5/v/model_type