Closed mail2mhossain closed 1 month ago
I can't reproduce your issue following the doc. please give a complete script to reproduce your issue
from typing import List, Optional
import uuid
from typing import Dict, List, TypedDict
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
SystemMessage,
ToolMessage,
)
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
from decouple import config
import warnings
#warnings.filterwarnings("ignore")
OPENAI_API_KEY = config("OPENAI_API_KEY")
GPT_MODEL = config("GPT_MODEL")
#Define a custom prompt to provide instructions and any additional context.
#1) You can add examples into the prompt template to improve extraction quality
#2) Introduce additional parameters to take context into account (e.g., include metadata
#about the document from which the text was extracted.)
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"You are an expert extraction algorithm. "
"Only extract relevant information from the text. "
"If you do not know the value of an attribute asked "
"to extract, return null for the attribute's value.",
),
#ββββββββββββββββββββββββββββ
MessagesPlaceholder("examples"), # <-- EXAMPLES!
#βββββββββββββββββββββββββββββ
("human", "{text}"),
]
)
class Person(BaseModel):
"""Information about a person."""
# ^ Doc-string for the entity Person.
# This doc-string is sent to the LLM as the description of the schema Person,
# and it can help to improve extraction results.
# Note that:
# 1. Each field is an `optional` -- this allows the model to decline to extract it!
# 2. Each field has a `description` -- this description is used by the LLM.
# Having a good description can help improve extraction results.
name: Optional[str] = Field(..., description="The name of the person")
hair_color: Optional[str] = Field(
..., description="The color of the person's hair if known"
)
height_in_meters: Optional[str] = Field(..., description="Height in METERs")
class Data(BaseModel):
"""Extracted data about people."""
# Creates a model so that we can extract multiple entities.
people: List[Person]
class Example(TypedDict):
"""A representation of an example consisting of text input and expected tool calls.
For extraction, the tool calls are represented as instances of pydantic model.
"""
input: str # This is the example text
tool_calls: List[BaseModel] # Instances of pydantic model that should be extracted
def tool_example_to_messages(example: Example) -> List[BaseMessage]:
"""Convert an example into a list of messages that can be fed into an LLM.
This code is an adapter that converts our example to a list of messages
that can be fed into a chat model.
The list of messages per example corresponds to:
1) HumanMessage: contains the content from which content should be extracted.
2) AIMessage: contains the extracted information from the model
3) ToolMessage: contains confirmation to the model that the model requested a tool correctly.
The ToolMessage is required because some of the chat models are hyper-optimized for agents
rather than for an extraction use case.
"""
messages: List[BaseMessage] = [HumanMessage(content=example["input"])]
tool_calls = []
for tool_call in example["tool_calls"]:
tool_calls.append(
{
"id": str(uuid.uuid4()),
"args": tool_call.dict(),
# The name of the function right now corresponds
# to the name of the pydantic model
# This is implicit in the API right now,
# and will be improved over time.
"name": tool_call.__class__.__name__,
},
)
messages.append(AIMessage(content="", tool_calls=tool_calls))
tool_outputs = example.get("tool_outputs") or [
"You have correctly called this tool."
] * len(tool_calls)
for output, tool_call in zip(tool_outputs, tool_calls):
messages.append(ToolMessage(content=output, tool_call_id=tool_call["id"]))
return messages
examples = [
(
"The ocean is vast and blue. It's more than 20,000 feet deep. There are many fish in it.",
Person(name=None, height_in_meters=None, hair_color=None),
),
(
"Fiona traveled far from France to Spain.",
Person(name="Fiona", height_in_meters=None, hair_color=None),
),
]
messages = []
for text, tool_call in examples:
messages.extend(
tool_example_to_messages({"input": text, "tool_calls": [tool_call]})
)
llm = ChatOpenAI(model_name=GPT_MODEL, temperature=0, openai_api_key=OPENAI_API_KEY)
chain = prompt | llm.with_structured_output(
schema=Data,
method="function_calling",
include_raw=False,
)
response = chain.invoke(
{
"text": "My name is Harrison. My hair is black.",
"examples": messages,
}
)
print(f"{response}\n")
response = chain.invoke(
{
"text": "The solar system is large, but earth has only 1 moon.",
"examples": messages,
}
)
print(f"{response}\n")
could you please reformat your code using the "code block" functionality like a line of code
?
I have reformat the code. I am using following packages:
Python Version: 3.11.9 langchain Version: 0.2.5 langgraph Version: 0.1.1 langchain_core Version: 0.2.9 langchain_community Version: 0.2.5 langchain_openai Version: 0.1.9 openai Version: 1.35.3
my apology. I still can't reproduce.
my env: Python Version: 3.9.19 langchain==0.2.5 langchain-community==0.2.5 langchain-core==0.2.9 langchain-experimental==0.0.61 langchain-openai==0.1.8 openai==1.30.5
GPT_MODEL = "gpt-3.5-turbo-0125"
I have just created a conda env based on your configuration.
Model "gpt-3.5-turbo-0125" gives following results:
people=[Person(name='Harrison', hair_color='black', height_in_meters='null')]
people=[Person(name='Fiona', hair_color='blonde', height_in_meters='1.65')]
Model "gpt-4o" gives following results:
people=[Person(name='Harrison', hair_color='black', height_in_meters=None)]
people=[]
Does error depend on "Python" version?
I tried python 3.11 as well, it has no problem. The problem is with "openai Version: 1.35.3"
I am also getting error for following env (Python:3.11.9):
Python Version: 3.11.9
langchain Version: 0.2.5
langchain_core Version: 0.2.9
langchain_community Version: 0.2.5
langchain_openai Version: 0.1.9
openai Version: 1.30.5
Error:
people=[Person(name='Harrison', hair_color='black', height_in_meters='null')]
Traceback (most recent call last):
File "Z:\llm_images\extract_info.py", line 148, in <module>
response = chain.invoke(
^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\runnables\base.py", line 2504, in invoke
input = step.invoke(input, config)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\output_parsers\base.py", line 169, in invoke
return self._call_with_config(
^^^^^^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\runnables\base.py", line 1598, in _call_with_config
context.run(
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\runnables\config.py", line 380, in call_func_with_variable_args
return func(input, **kwargs) # type: ignore[call-arg]
^^^^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\output_parsers\base.py", line 170, in <lambda>
lambda inner_input: self.parse_result(
^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\output_parsers\openai_tools.py", line 196, in parse_result
pydantic_objects.append(name_dict[res["type"]](**res["args"]))
~~~~~~~~~^^^^^^^^^^^^^
KeyError: 'Person'
you're right. whether the error is triggered or not in a particular run is related to the response that openai returned. my current investigation shows that it has something to do with what data schema openai thinks it can use. for example in the below, in the 1st run, it returns the json as an array of people and decides to use the "Data" schema, which results in correct output parsing; in the 2nd run, it returns the json as one single person and decide to use the "Person" schema which is not available (because we pass "schema=Data" to the llm)
json result [{'args': {'people': [{'name': 'Harrison', 'hair_color': 'black', 'height_in_meters': 'null'}]}, 'type': 'Data'}]
name_dict: {'Data': <class '__main__.Data'>}
people=[Person(name='Harrison', hair_color='black', height_in_meters='null')]
json result [{'args': {'name': None, 'hair_color': None, 'height_in_meters': None}, 'type': 'Person'}]
name_dict: {'Data': <class '__main__.Data'>}
Traceback (most recent call last):
...
see https://smith.langchain.com/public/800cd078-115c-495b-9280-fedbd2e83c4f/r
the LLM knows that it only has the tool "Data" , but since the example in our code gives example function call to "Person", it confuses the LLM and causes it to return function call to "Person" as well.
so maybe you can try fixing the examples to the below?
examples = [
(
"The ocean is vast and blue. It's more than 20,000 feet deep. There are many fish in it.",
Data(people=[]),
),
(
"Fiona traveled far from France to Spain.",
Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]),
),
]
Now it is working fine. Thank you very much.
@eyurtsev I think this issue can be closed; an update for the issue has been merged.
URL
https://python.langchain.com/v0.2/docs/how_to/extraction_examples/
Checklist
Issue with current documentation:
Getting following error:
Traceback (most recent call last): File "Z:\llm_images\extract_info.py", line 148, in
response = chain.invoke(
^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\runnables\base.py", line 2504, in invoke
input = step.invoke(input, config)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\output_parsers\base.py", line 169, in invoke
return self._call_with_config(
^^^^^^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\runnables\base.py", line 1598, in _call_with_config
context.run(
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\runnables\config.py", line 380, in call_func_with_variable_args
return func(input, kwargs) # type: ignore[call-arg]
^^^^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\output_parsers\base.py", line 170, in
lambda inner_input: self.parse_result(
^^^^^^^^^^^^^^^^^^
File "Z:\conda_env\ccs\Lib\site-packages\langchain_core\output_parsers\openai_tools.py", line 196, in parse_result
pydantic_objects.append(name_dict[res["type"]]( res["args"]))