langchain-ai / langchain

🦜🔗 Build context-aware reasoning applications
https://python.langchain.com
MIT License
88.69k stars 13.94k forks source link

JsonOutputParser fails at times when the LLM encloses the output JSON within ``` json ... ``` #23297

Open deathsaber opened 2 weeks ago

deathsaber commented 2 weeks ago

Checked other resources

Example Code

from langchain_core.output_parsers import JsonOutputParser

msg = "what queries must i run?"

class Step(BaseModel):
    step_name: str = Field(
        description="...")
    tool_to_use: str = Field(description="...")
    tool_input: str = Field(description="...")
    depends_on: List[str] = Field(
        description="...")

class PlanOutput(BaseModel):
    task: str = Field(description="...")
    steps: List[Step] = Field(description="...")

parser = JsonOutputParser(pydantic_object=PlanOutput)

llm = ChatOpenAI(...)
chain = ChatPromptTemplate.from_messages([('user': '...{input} Your output must follow this format: {format}' | llm | parser

chain.invoke({'format': plan_parser.get_format_instructions(), "input": msg})

Error Message and Stack Trace (if applicable)

2024-06-22 11:21:03,116 - agent.py - 90 - ERROR - Traceback (most recent call last): File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/output_parsers/json.py", line 66, in parse_result return parse_json_markdown(text) File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/utils/json.py", line 147, in parse_json_markdown return _parse_json(json_str, parser=parser) File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/utils/json.py", line 160, in _parse_json return parser(json_str) File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/utils/json.py", line 120, in parse_partial_json return json.loads(s, strict=strict) File "/usr/lib/python3.9/json/init.py", line 359, in loads return cls(**kw).decode(s) File "/usr/lib/python3.9/json/decoder.py", line 337, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "/usr/lib/python3.9/json/decoder.py", line 355, in raw_decode raise JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError: Expecting value: line 6 column 22 (char 109)

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "/mnt/d/python_projects/azure-openai-qa-bot/nat-sql/src/agent.py", line 69, in talk for s in ap.app.stream({"task": inp, 'session_id': sid}, config=args): File "/root/classifier/.venv/lib/python3.9/site-packages/langgraph/pregel/init.py", line 963, in stream _panic_or_proceed(done, inflight, step) File "/root/classifier/.venv/lib/python3.9/site-packages/langgraph/pregel/init.py", line 1489, in _panic_or_proceed raise exc File "/usr/lib/python3.9/concurrent/futures/thread.py", line 58, in run result = self.fn(*self.args, self.kwargs) File "/root/classifier/.venv/lib/python3.9/site-packages/langgraph/pregel/retry.py", line 66, in run_with_retry task.proc.invoke(task.input, task.config) File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 2399, in invoke input = step.invoke( File "/root/classifier/.venv/lib/python3.9/site-packages/langgraph/utils.py", line 95, in invoke ret = context.run(self.func, input, kwargs) File "/mnt/d/python_projects/azure-openai-qa-bot/nat-sql/src/action_plan.py", line 138, in _plan_steps plan = self.planner.invoke({"task": state['task'], 'chat_history': hist if not self.no_mem else [], File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 2399, in invoke input = step.invoke( File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/output_parsers/base.py", line 169, in invoke return self._call_with_config( File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 1509, in _call_with_config context.run( File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/runnables/config.py", line 365, in call_func_with_variable_args return func(input, **kwargs) # type: ignore[call-arg] File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/output_parsers/base.py", line 170, in lambda inner_input: self.parse_result( File "/root/classifier/.venv/lib/python3.9/site-packages/langchain_core/output_parsers/json.py", line 69, in parse_result raise OutputParserException(msg, llm_output=text) from e langchain_core.exceptions.OutputParserException: Invalid json output: ```json { "task": "what Queries tmust i run", "steps": [ { "step_name": "Step#1", "tool_to_use": Document_Search_Tool, "tool_input": "What queries must I run?", "depends_on": [] } ] }

Description

Sometimes, despite adding a JSON output parser to the LLM chain, the LLM may enclose the generated JSON within json... tags.

This causes the JSON output parser to fail. It would be nice if the parser could check for this enclosure and remove it before parsing the JSON.

image

System Info

langchain==0.2.1
langchain-chroma==0.1.1
langchain-cli==0.0.24
langchain-community==0.2.1
langchain-core==0.2.3
langchain-openai==0.1.8
langchain-text-splitters==0.2.0
langchain-visualizer==0.0.33
langchainhub==0.1.19
spike-spiegel-21 commented 2 weeks ago

@deathsaber JsonOutputParser works completely fine by detecting the valid json.

{
{
    "task": "what Queries tmust i run",
    "steps": [
        {
            "step_name": "Step#1",
            "tool_to_use": Document_Search_Tool, #not valid, should be "Document_Search_Tool" 
            "tool_input": "What queries must I run?",
            "depends_on": []
        }
    ]
}

This is not a valid json output. Try providing some examples along with the prompt to generate consistent and valid results.