Failed to parse response from model when using OllamaFunctions with bind_tools()

WenJett commented 3 days ago

Checked other resources

[X] I added a very descriptive title to this issue.
[X] I searched the LangGraph/LangChain documentation with the integrated search.
[X] I used the GitHub search to find a similar question and didn't find it.
[X] I am sure that this is a bug in LangGraph/LangChain rather than my code.
[X] I am sure this is better as an issue rather than a GitHub discussion, since this is a LangGraph bug and not a design question.

Example Code

from typing import Annotated, Literal, Optional
from typing_extensions import TypedDict
from langgraph.graph.message import AnyMessage, add_messages
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import Runnable, RunnableConfig
from langchain_core.runnables import RunnableLambda
from langchain_core.messages import ToolMessage
from langgraph.prebuilt import ToolNode
from typing import Callable
from langchain_core.messages import ToolMessage
from typing import Literal
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import END, StateGraph
from langgraph.prebuilt import tools_condition
from langchain_openai import ChatOpenAI
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from typing import Optional, List
from langchain_core.tools import tool
import docx
from datetime import datetime
import ast
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_core.tools import Tool

# stack to keep track of the current state of task
def update_dialog_stack(left: list[str], right: Optional[str]) -> list[str]:
    """Push or pop the state."""
    if right is None:
        return left
    if right == "pop":
        return left[:-1]
    return left + [right]

# state of the LLM
class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]
    user_info: str
    dialog_state: Annotated[
        list[
        Literal["primary_assistant", 
                "search_assistant",]],
        update_dialog_stack,]

class Assistant:
    def __init__(self, runnable: Runnable):
        self.runnable = runnable

    def __call__(self, state: State, config: RunnableConfig):
        while True:
            result = self.runnable.invoke(state)

            if not result.tool_calls and (
                not result.content
                or isinstance(result.content, list)
                and not result.content[0].get("text")
            ):
                messages = state["messages"] + [("user", "Respond with a real output.")]
                state = {**state, "messages": messages}
            else:
                break
        return {"messages": result}

class CompleteOrEscalate(BaseModel):
    """A tool to mark the current task as completed and/or to escalate control of the dialog to the main assistant,
    who can re-route the dialog based on the user's needs."""

    cancel: bool = True
    reason: str

    class Config:
        schema_extra = {
            "example": {
                "cancel": True,
                "reason": "User changed their mind about the current task.",
            },
            "example 2": {
                "cancel": True,
                "reason": "I have fully completed the task.",
            },
            "example 3": {
                "cancel": False,
                "reason": "I need to have additional information from user to search.",
            },
        }

def handle_tool_error(state) -> dict:
    error = state.get("error")
    debug_logs = state.get("debug_logs", [])
    tool_calls = state["messages"][-1].tool_calls
    return {
        "messages": [
            ToolMessage(
                content=f"Error: {repr(error)}\nDebug Logs: {debug_logs}\nPlease fix your mistakes and try to recall the function again.",
                tool_call_id=tc["id"],
            )
            for tc in tool_calls
        ]
    }

def create_tool_node_with_fallback(tools: list) -> dict:
    return ToolNode(tools).with_fallbacks(
        [RunnableLambda(handle_tool_error)], exception_key="error"
    )

def _print_event(event: dict, _printed: set, max_length=3000):
    current_state = event.get("dialog_state")
    if current_state:
        print(f"Currently in: ", current_state[-1])
    message = event.get("messages")
    if message:
        if isinstance(message, list):
            message = message[-1]
        if message.id not in _printed:
            msg_repr = message.pretty_repr(html=True)
            if len(msg_repr) > max_length:
                msg_repr = msg_repr[:max_length] + " ... (truncated)"
            print(msg_repr)
            _printed.add(message.id)

# This node will be shared for exiting all specialized assistants
def pop_dialog_state(state: State) -> dict:
    """Pop the dialog stack and return to the main assistant.

    This lets the full graph explicitly track the dialog flow and delegate control
    to specific sub-graphs.
    """
    messages = []
    if state["messages"][-1].tool_calls:
        # Note: Doesn't currently handle the edge case where the llm performs parallel tool calls
        messages.append(
            ToolMessage(
                content="Resuming dialog with the host assistant. Please reflect on the past conversation and assist the user as needed.",
                tool_call_id=state["messages"][-1].tool_calls[0]["id"],
            )
        )
    return {
        "dialog_state": "pop",
        "messages": messages,
    }

# LLM model to be used, low temperature to control the output possibilities
llm = OllamaFunctions(model="phi3:14b-medium-4k-instruct-q8_0", format = 'json', temperature = 0, verbose = True)

## class to transfer from primary_assistant to search_assistant
class TosearchAssistant(BaseModel):
    """
    Transfers work to a specialized assistant to search online
    """

    request: str = Field(
        description = "Any necessary followup questions the update flight assistant should clarify before proceeding."
    )

## Tool to identify the email to be used 
@tool
def online_query(request: str) -> str:
    """
    Description:
        query online

    Output:
        returns the result
    """
    search_tool =  DuckDuckGoSearchRun(max_results=2) 
    search_result = search_tool(request)
    return search_result

## prompt for the assistant
search_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a specialized assistant working to search."
            "Remember that the report isn't completed until after the relevant tool has successfully been used."
            "Do not invoke the tool until all necessary information are obtained."
            'If none of your tools are appropriate for it, then "CompleteOrEscalate" the dialog to the host assistant.'
            "Do not waste the user's time. Do not make up invalid tools or functions."
            "\nCurrent time : {time}",
        ),
        ("placeholder", "{messages}"),
    ]
).partial(time=datetime.now())

search_assistant_safe_tools = [online_query]
search_assistant_sensitive_tools = []
search_assistant_tools = search_assistant_safe_tools + search_assistant_sensitive_tools
search_assistant_runnable = search_assistant_prompt | llm.bind_tools(search_assistant_tools + [CompleteOrEscalate])

from datetime import datetime 
from langchain_community.tools import DuckDuckGoSearchRun

primary_assistant_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful support assistant. "
            "Your primary role is to handle user's request. "
            "Delegate the task to the appropriate specialized assistant by invoking the corresponding tool."
            "You can only search for president, any other search pass to search assistant."
            "You are not able to make these types of changes yourself."
            "Only the specialized assistants are given permission to do this for the user."
            "The user is not aware of the different specialized assistants, so do not mention them; just quietly delegate through function calls. "
            "Provide detailed information to the customer, and always double-check before concluding that information is unavailable. "
            "When searching, be persistent. Expand your query bounds once if the first search returns no results. "
            "\nCurrent time: {time}.",
        ),
        ("placeholder", "{messages}"),
    ]
).partial(time=datetime.now())
primary_assistant_tools = [DuckDuckGoSearchRun(max_results=1)]
assistant_runnable = primary_assistant_prompt | llm.bind_tools(primary_assistant_tools + [TosearchAssistant,])

from typing import Callable
from langchain_core.messages import ToolMessage

from typing import Literal
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import END, StateGraph
from langgraph.prebuilt import tools_condition

## initial entry node when switch to a new assistant to prompt
def create_entry_node(assistant_name: str, new_dialog_state: str) -> Callable:
    def entry_node(state: State) -> dict:
        tool_call_id = state["messages"][-1].tool_calls[0]["id"]
        return {
            "messages": [
                ToolMessage(
                    content=f"The assistant is now the {assistant_name}. Reflect on the above conversation between the host assistant and the user."
                    f" The user's intent is unsatisfied. Use the provided tools to assist the user. Remember, you are {assistant_name},"
                    " and the task is not complete until after you have successfully invoked the appropriate tool."
                    " If the user changes their mind or needs help for other tasks, call the CompleteOrEscalate function to let the primary host assistant take control."
                    " Ask the user for information in order to perform the task. ",
                    tool_call_id=tool_call_id,
                )
            ],
            "dialog_state": new_dialog_state,
        }

    return entry_node

## Building of graph (search assistant portion)
builder = StateGraph(State)

builder.add_node("enter_search_assistant", create_entry_node("search Generation Assistant", "search_assistant")) # transfering from Primary to search_assistant
builder.add_node("search_assistant", Assistant(search_assistant_runnable)) # actual node with search_assistant that has the tools to perform tasking
builder.add_edge("enter_search_assistant", "search_assistant") # connects the two nodes
builder.add_node("search_assistant_sensitive_tools",create_tool_node_with_fallback(search_assistant_sensitive_tools),)
builder.add_node("search_assistant_safe_tools",create_tool_node_with_fallback(search_assistant_safe_tools),)

## Determine the possible routing for search_assistant
def route_search_Assistant(state: State,) -> Literal["search_assistant_sensitive_tools", "search_assistant_safe_tools", "leave_skill", "__end__",]:

    route = tools_condition(state)
    if route == END:
        return END
    tool_calls = state["messages"][-1].tool_calls
    did_cancel = any(tc["name"] == CompleteOrEscalate.__name__ for tc in tool_calls)
    if did_cancel:
        return "leave_skill"
    safe_toolnames = [t.name for t in search_assistant_safe_tools]
    if all(tc["name"] in safe_toolnames for tc in tool_calls):
        return "search_assistant_safe_tools"
    return "search_assistant_sensitive_tools"

builder.add_edge("search_assistant_sensitive_tools", "search_assistant")
builder.add_edge("search_assistant_safe_tools", "search_assistant")
builder.add_conditional_edges("search_assistant", route_search_Assistant)

# Primary assistant
builder.add_node("primary_assistant", Assistant(assistant_runnable))
builder.set_entry_point("primary_assistant")
builder.add_node("primary_assistant_tools", create_tool_node_with_fallback(primary_assistant_tools))

def route_primary_assistant(state: State,) -> Literal["primary_assistant_tools","enter_search_assistant","__end__",]:
    route = tools_condition(state)
    if route == END:
        return END
    tool_calls = state["messages"][-1].tool_calls
    if tool_calls:
        if tool_calls[0]["name"] == TosearchAssistant.__name__:
            return "enter_search_assistant"
        return "primary_assistant_tools"
    raise ValueError("Invalid route")

# The assistant can route to one of the delegated assistants, directly use a tool, or directly respond to the user
builder.add_conditional_edges(
    "primary_assistant",
    route_primary_assistant,
    {
        "enter_search_assistant": "enter_search_assistant",
        "primary_assistant_tools": "primary_assistant_tools",
        END: END,
    },
)
builder.add_edge("primary_assistant_tools", "primary_assistant")
builder.add_node("leave_skill", pop_dialog_state)
builder.add_edge("leave_skill", "primary_assistant")

# Each delegated workflow can directly respond to the user
# When the user responds, we want to return to the currently active workflow
def route_to_workflow(state: State,) -> Literal["primary_assistant","search_assistant",]:
    """If we are in a delegated state, route directly to the appropriate assistant."""
    dialog_state = state.get("dialog_state")
    if not dialog_state:
        return "primary_assistant"
    return dialog_state[-1]

config = {"configurable": {"thread_id": 403,}}

# Compile graph
memory = SqliteSaver.from_conn_string(":memory:")
final_graph = builder.compile(
    checkpointer=memory,
    # Let the user approve or deny the use of sensitive tools
    interrupt_before=["search_assistant_sensitive_tools",
    ],
)

_printed = set()       

while True:
    user_input = input("Please type your request below (type 'exit' to quit):\n\n")

    # Check if the user wants to exit
    if user_input.lower() == "exit":
        print("Exiting...")
        break

    # Put user input into the stream
    events = final_graph.stream({"messages": ("user", user_input)}, config, stream_mode="values")

    # Process events
    for event in events:
        _print_event(event, _printed)
    snapshot = final_graph.get_state(config)
    while snapshot.next:

        # We have an interrupt! The agent is trying to use a sensitive tool, and the user can approve or deny it
        user_approval = input(
            "Do you approve of the above actions? Type 'yes' to continue; otherwise, type 'no' and input a new request or type 'exit' to quit.\n\n"
        )
        if user_approval.lower().strip() == "yes":
            # Continue with the graph execution
            result = final_graph.invoke(
                None,
                config,
            )
        else:
            result = final_graph.invoke(
                {
                    "messages": [
                        ToolMessage(
                            tool_call_id=event["messages"][-1].tool_calls[0]["id"],
                            content=f"API call denied by user. Reasoning: '{user_approval}'. Revert the user back to the previous state.",
                        )
                    ]
                },
                config,
            )
        snapshot = final_graph.get_state(config)

Error Message and Stack Trace (if applicable)

ask the search assistant to find the population size of new york
================================== Ai Message ==================================
Tool Calls:
  duckduckgo_search (call_556e7a4ef6ea47038d98e5ece6d50563)
 Call ID: call_556e7a4ef6ea47038d98e5ece6d50563
  Args:
    query: New York State population
================================= Tool Message =================================
Name: duckduckgo_search

New York. QuickFacts provides statistics for all states and counties. Also for cities and towns with a population of 5,000 or more. ... In Vintage 2022, as a result of the formal request from the state, Connecticut transitioned from eight counties to nine planning regions. 7,604,523. Persons per household, 2018-2022. 2.55. Living in same house 1 year ago, percent of persons age 1 year+, 2018-2022. 89.8%. Language other than English spoken at home, percent of persons age 5 years+, 2018-2022. 30.6%. Computer and Internet Use. Households with a computer, percent, 2018-2022. The population in New York aged 65+ grew by 30.2% from 2010 to 2020, while the 85+ population grew by 13%. In 2020, New York State had the fourth largest populations of both same-sex married (48,442) and same-sex unmarried couples (35,096) in the country. New York, constituent state of the U.S., one of the 13 original colonies and states. Its capital is Albany and its largest city is New York City, the cultural and financial center of American life. Until the 1960s New York was the country's leading state in nearly all population, cultural, and economic indexes. The population of New York continues to decline faster than any other state, according to the latest estimates out Tuesday from the U.S. Census Bureau. With a net loss of just under 102,000 ...
================================== Ai Message ==================================
Tool Calls:
  duckduckgo_search (call_5847cf1707c24ee8bf51c265b61f0f99)
 Call ID: call_5847cf1707c24ee8bf51c265b61f0f99
  Args:
    query: population size of New York
================================= Tool Message =================================
Name: duckduckgo_search

This influx dramatically increased population density, making New York City one of the most densely populated cities in the United States. 1900s - The Modern Metropolis. ... New York City (NYC) Size: Approximately 468.9 square miles (1,214 km²). Population (as of 2021): Over 8.4 million. Basic Statistic New York-Newark-Jersey City metro area population in the U.S. 2010-2022 Premium Statistic Resident population of New York City, NY, by race 2022 New York City has a 2024 population of 7,931,147, according to data from World Population Review. World Population Review says New York City is currently declining at a rate of -2.49% annually and ... Yes, New York is the biggest city in the state of New York based on population. The second largest city, Buffalo, is 3% the size of New York. What is the population density of New York, New York? New York has a population density of 27,012.5 people per square mile. New York-Newark-Jersey City metro area population in the U.S. 2010-2022. Published by. Veera Korhonen , Nov 17, 2023. In 2022, about 19.62 million people populated the New York-Newark-Jersey City ...
Traceback (most recent call last):
  File "/home/user/jett/test.py", line 351, in <module>
    for event in events:
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langgraph/pregel/__init__.py", line 983, in stream
    _panic_or_proceed(done, inflight, step)
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langgraph/pregel/__init__.py", line 1537, in _panic_or_proceed
    raise exc
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/concurrent/futures/thread.py", line 58, in run
    result = self.fn(*self.args, **self.kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langgraph/pregel/retry.py", line 72, in run_with_retry
    task.proc.invoke(task.input, task.config)
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/runnables/base.py", line 2502, in invoke
    input = step.invoke(input, config, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langgraph/utils.py", line 95, in invoke
    ret = context.run(self.func, input, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/jett/test.py", line 40, in __call__
    result = self.runnable.invoke(state)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/runnables/base.py", line 2504, in invoke
    input = step.invoke(input, config)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/runnables/base.py", line 4573, in invoke
    return self.bound.invoke(
           ^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 170, in invoke
    self.generate_prompt(
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 599, in generate_prompt
    return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 456, in generate
    raise e
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 446, in generate
    self._generate_with_cache(
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py", line 671, in _generate_with_cache
    result = self._generate(
             ^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/agentic/lib/python3.12/site-packages/langchain_experimental/llms/ollama_functions.py", line 418, in _generate
    raise ValueError(
ValueError: Failed to parse a response from phi3:14b-medium-4k-instruct-q8_0 output: {

"Population of New York State": "As of July 1, 2021, the estimated population of New York state was approximately 20.21 million according to the United States Census Bureau."}

Description

I am trying to implement OllamaFunctions into the chatbot example part 4 shown in langGraph documentation. However, I face into an issue as when a tool is called, it arise this error of "Failed to parse a response from model" although the LLM have already retrieved the necessary information. The structure of the code and functions are all copied over from the example provided in documentation, with the exception of using OllamaFunctions instead of OpenAI model (which i have tried and it works without issue).

I have always tried with changing the Ollama model such as llama and phi3, but it still brings about the same error.

EDIT: I forgot to mention that the issue typically arises when I specify the initial chat to route to search_agent and use one of its tool (which is the online_query() in this case), and it will provide the error. However, if I were to use the initial primary agent and it's tool, it would not cause an error and is able to output normally.

System Info

platform mac python version 3.12.4

gbaian10 commented 3 days ago

I think this is a problem with the small LLM itself. It can't force the tool to be called, resulting in sometimes output that OllamaFunctions cannot parse correctly. LLM sometimes outputs some incorrect structure content causing parsing failure.

I minified your code to just use OllamaFunctions and deliberately entered some weird values to reproduce this error as best as possible.

from langchain_core.pydantic_v1 import BaseModel
from langchain_experimental.llms.ollama_functions import OllamaFunctions

model = OllamaFunctions(model="phi3", format="json")

class CompleteOrEscalate(BaseModel):
    """A tool to mark the current task as completed and/or to escalate control of the dialog to the main assistant,
    who can re-route the dialog based on the user's needs."""

    cancel: bool = True
    reason: str

    class Config:
        schema_extra = {
            "example": {
                "cancel": True,
                "reason": "User changed their mind about the current task.",
            },
            "example 2": {
                "cancel": True,
                "reason": "I have fully completed the task.",
            },
            "example 3": {
                "cancel": False,
                "reason": "I need to have additional information from user to search.",
            },
        }

model = model.bind_tools(tools=[CompleteOrEscalate])
for _ in range(10):
    print(model.invoke("sftgstrew5t6436fgvhbfdat"))  # It doesn't always error, just run it until it does.

So I think this may not be directly related to the operation of LangGraph.

WenJett commented 3 days ago

@gbaian10 seems like LLM is the issue at the core but I would assume Llama 70b to suffice for this kind of prompting. I did try to include in the prompt template to follow a certain output format, which I assume the default to be 'content="" id="" ' (Not too sure on this). It is still quite reluctant to follow and produces different key such as name_of_event, class, grade which were part of my query.

I attempted with another tool and it produced an error message: ValueError: Failed to parse a response from phi3:14b-medium-4k-instruct-q8_0 output: { "name_of_event": "", "class": "", "grade": "" }

Even with Llama 70b, it also produced a similar error message: ValueError: Failed to parse a response from llama3:70b-instruct output: { "name": "conversational_ai" }

vbarda commented 3 days ago

Closing since the issue is caused by the underlying LLM

langchain-ai / langgraph