langchain-ai / langchain

🦜🔗 Build context-aware reasoning applications
https://python.langchain.com
MIT License
92.41k stars 14.78k forks source link

Chat Agent doesn't always use bound tools for JsonOutputParser #19474

Closed Travis-Barton closed 2 months ago

Travis-Barton commented 5 months ago

Checked other resources

Example Code

If I use my fact checker:

import sys
import os

current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir)
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from llm_utils import get_prompt, AllowedModels, load_chat_model, get_chat_prompt, load_llm_model
from typing import List, Dict
from fact_check.checker import FactChecker
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain_core.output_parsers.openai_tools import JsonOutputToolsParser
from models import QuestionSet, Question, FactCheckQuestion, StandardObject
import asyncio

from langchain.globals import set_debug

set_debug(False)

def parse_results(result):

    fact_check = result[0][0]['args']  # Sometimes it doesnt use the freaking json parser

    return {
        'question': fact_check['question'],
        'answer': fact_check['answer'],
        'category': fact_check['category'],
        'explanation': fact_check['explanation'],
        'fact_check': fact_check['fact_check']
    }

async def gather_tasks(tasks):
    return await asyncio.gather(*tasks)

async def afact_checker(question: Question) -> FactCheckQuestion:
    """
    Uses an OpenAI model to generate a list of questions for each category.
    :param model: The model to use for question generation.
    :param categories: A list of categories to generate questions for.
    :return:
    """
    fact_check = FactChecker(question.question, question.answer)
    response = fact_check._get_answer()

    model = AllowedModels('gpt-4')
    prompt = get_chat_prompt('fact_checking')
    llm = load_chat_model(model)
    llm = llm.bind_tools([FactCheckQuestion])
    parser = JsonOutputToolsParser()
    chain = prompt['prompt'] | llm | parser  # now lets the use the perplexity model to assert if the answer is correct

    actively_grading = []
    task = chain.ainvoke({
        'question': question.question,
        'answer': question.answer,
        'category': question.category,
        'findings': response,
    })
    actively_grading.append(task)
    results = await asyncio.gather(*actively_grading)

    parsed_results = parse_results(results)

    return FactCheckQuestion(**parsed_results)

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    result = loop.run_until_complete(afact_checker(Question(question="What is the capital of Nigeria?",
                                                            answer="Abuja",
                                                            category="Geography",
                                                            difficulty="hard")))
    loop.close()
    print(result)

It returns an error 50% of the time due to the fact that the return from the ChatModel sometimes uses the tool and sometimes doesn't. I'm afraid I cant share my prompt, but its a pretty simply system and user prompt that makes no mention of how it should be structured as an output.

Here are two examples of returns from the same code:

# using the bound tool
{
  "generations": [
    [
      {
        "text": "",
        "generation_info": {
          "finish_reason": "tool_calls",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "",
            "additional_kwargs": {
              "tool_calls": [
                {
                  "id": "call_FetSvBCClds7wRDu7oEpfOD3",
                  "function": {
                    "arguments": "{\n\"question\": \"What is the capital of Nigeria?\",\n\"answer\": \"Abuja\",\n\"category\": \"Geography\",\n\"fact_check\": true,\n\"explanation\": \"correct\"\n}",
                    "name": "FactCheckQuestion"
                  },
                  "type": "function"
                }
              ]
            }
          }
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "completion_tokens": 48,
      "prompt_tokens": 311,
      "total_tokens": 359
    },
    "model_name": "gpt-4",
    "system_fingerprint": null
  },
  "run": null
}

# forgoing the bound tool
{
  "generations": [
    [
      {
        "text": "{ \"question\": \"What is the capital of Nigeria?\", \"answer\": \"Abuja\", \"category\": \"Geography\", \"fact_check\": true, \"explanation\": \"correct\" }",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "{ \"question\": \"What is the capital of Nigeria?\", \"answer\": \"Abuja\", \"category\": \"Geography\", \"fact_check\": true, \"explanation\": \"correct\" }",
            "additional_kwargs": {}
          }
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "completion_tokens": 41,
      "prompt_tokens": 311,
      "total_tokens": 352
    },
    "model_name": "gpt-4",
    "system_fingerprint": null
  },
  "run": null
}

There is no difference between these two runs. I simply called chain.invoke{..} twice.

Is there a way to force the ChatModel to use the bound tool?

Error Message and Stack Trace (if applicable)

No response

Description

If I call the invoke function twice on a Pydantic tool bound ChatModel It alternates between using the tool to return a JSON object and returning raw text.

System Info

System Information

OS: Darwin OS Version: Darwin Kernel Version 23.2.0: Wed Nov 15 21:55:06 PST 2023; root:xnu-10002.61.3~2/RELEASE_ARM64_T6020 Python Version: 3.11.7 (main, Dec 15 2023, 12:09:04) [Clang 14.0.6 ]

Package Information

langchain_core: 0.1.31 langchain: 0.1.12 langchain_community: 0.0.28 langsmith: 0.1.25 langchain_anthropic: 0.1.4 langchain_openai: 0.0.8 langchain_text_splitters: 0.0.1

Packages not installed (Not Necessarily a Problem)

The following packages were not found:

langgraph langserve

Travis-Barton commented 5 months ago

My solution was to make my own parser:

def pydantic_parser(output, pydantic_object):
    # If no function was invoked, return to user
    parser_json = JsonOutputToolsParser()
    json_output = parser_json.parse_result(output)
    if json_output:
        return pydantic_object(**json_output[0]['args'])
    else:
        logging.warning(f"Could not parse output with JsonOutputToolsParser. Trying simple parser.")
    output = output[0]
    output = output.dict()
    try:
        return pydantic_object(**json.loads(output.get('text')))
    except json.JSONDecodeError:
        logging.warning(f"Could not parse output {output} with simple EVAL parser. Trying trimmed parser.")

    try:
        trimmed_result = output.get('text').split('}')[0] + '}'
        return pydantic_object(**json.loads(trimmed_result))
    except json.JSONDecodeError:
        logging.error(f"Could not parse output: {trimmed_result} with trimmed EVAL parser. Trying an LLM solution.")

    # If all else fails, try another LLM call to fix it
    try:
        return simple_pydantic_parser(output.get('text'), pydantic_object)
    except Exception as e:
        raise Exception(f"Could not parse output: {output.get('text')} with any parser. Error: {e}")

def simple_pydantic_parser(output, pydantic_object):
    model = ChatOpenAI(temperature=0)
    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    prompt = PromptTemplate(
        template="You are a reformat tool. Your job is to fix wrongly formatted documents using your tool. "
                 "reformat this query into the proper format.\n{format_instructions}\n{query}\n",
        input_variables=["query"],
        partial_variables={
            "format_instructions": parser.get_format_instructions()},
    )
    chain = prompt | model | parser

    return chain.invoke({"query": output})