JSONDecodeError: Invalid control character at: line 2 column 631 (char 632) when using langchain new agent

glejdis commented 9 months ago

Checked other resources

[X] I added a very descriptive title to this issue.
[X] I searched the LangChain documentation with the integrated search.
[X] I used the GitHub search to find a similar question and didn't find it.

Example Code

This code:

def parse(output):
    # If no function was invoked, return to user
    if "function_call" not in output.additional_kwargs:
        return AgentFinish(return_values={"output": output.content}, log=output.content)

    # Parse out the function call
    function_call = output.additional_kwargs["function_call"]
    name = function_call["name"]
    inputs = json.loads(function_call["arguments"])

    # If the Response function was invoked, return to the user with the function inputs
    if name == "Response":
        return AgentFinish(return_values=inputs, log=str(function_call))
    # Otherwise, return an agent action
    else:
        return AgentActionMessageLog(
            tool=name, tool_input=inputs, log="", message_log=[output]
        )

def creat_ai_search_new_agent(embeddings, llm, class_name_rich):

    ai_search_endpoint = get_ai_search_endpoint()
    ai_search_admin_key = get_ai_search_admin_key()

    vector_store = AzureSearch(
            azure_search_endpoint=ai_search_endpoint,
            azure_search_key=ai_search_admin_key,
            index_name=class_name_rich,
            embedding_function=embeddings.embed_query,
            content_key=content_key
        )

    """Retriever that uses `Azure Cognitive Search`."""

    azure_search_retriever = AzureSearchVectorStoreRetriever(
            vectorstore=vector_store,
            search_type=search_type, 
            k=k,
            top=n
        )

    retriever_tool = create_retriever_tool(
        azure_search_retriever,
        "Retriever",
        "Useful when you need to retrieve information from documents",
    )

    class Response(BaseModel):
        """Final response to the question being asked"""

        answer: str = Field(description="The final answer to respond to the user")
        sources: List[int] = Field(
            description="List of page chunks that contain answer to the question. Only include a page chunk if it contains relevant information"
        )

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are a helpful assistant who retrieves information from documents"),
            ("user", "{input}"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )

    llm_with_tools = llm.bind(
        functions=[
            # The retriever tool
            format_tool_to_openai_function(retriever_tool),
            # Response schema
            convert_pydantic_to_openai_function(Response),
        ]
    )

    try:

        agent = (
            {
                "input": lambda x: x["input"],
                # Format agent scratchpad from intermediate steps
                "agent_scratchpad": lambda x: format_to_openai_function_messages(
                    x["intermediate_steps"]
                ),
            }
            | prompt
            | llm_with_tools
            | parse
        )

        agent_executor = AgentExecutor(tools=[retriever_tool], agent=agent, verbose=True, return_intermediate_steps=True)

    except Exception as e:
        print(e)
        print("error instanciating the agent")

    return agent_executor

Gives me the following error: JSONDecodeError Traceback (most recent call last) File , line 5 3 # get the start time 4 st = time.time() ----> 5 answer = agent_executor.invoke( 6 {"input": text}, 7 # return_only_outputs=True, 8
9 ) 10 # get the end time 11 et = time.time()

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/chains/base.py:162, in Chain.invoke(self, input, config, **kwargs) 160 except BaseException as e: 161 run_manager.on_chain_error(e) --> 162 raise e 163 run_manager.on_chain_end(outputs) 164 final_outputs: Dict[str, Any] = self.prep_outputs( 165 inputs, outputs, return_only_outputs 166 )

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/chains/base.py:156, in Chain.invoke(self, input, config, **kwargs) 149 run_manager = callback_manager.on_chain_start( 150 dumpd(self), 151 inputs, 152 name=run_name, 153 ) 154 try: 155 outputs = ( --> 156 self._call(inputs, run_manager=run_manager) 157 if new_arg_supported 158 else self._call(inputs) 159 ) 160 except BaseException as e: 161 run_manager.on_chain_error(e)

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/agents/agent.py:1329, in AgentExecutor._call(self, inputs, run_manager) 1327 # We now enter the agent loop (until it returns something). 1328 while self._should_continue(iterations, time_elapsed): -> 1329 next_step_output = self._take_next_step( 1330 name_to_tool_map, 1331 color_mapping, 1332 inputs, 1333 intermediate_steps, 1334 run_manager=run_manager, 1335 ) 1336 if isinstance(next_step_output, AgentFinish): 1337 return self._return( 1338 next_step_output, intermediate_steps, run_manager=run_manager 1339 )

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/agents/agent.py:1055, in AgentExecutor._take_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager) 1046 def _take_next_step( 1047 self, 1048 name_to_tool_map: Dict[str, BaseTool], (...) 1052 run_manager: Optional[CallbackManagerForChainRun] = None, 1053 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]: 1054 return self._consume_next_step( -> 1055 [ 1056 a 1057 for a in self._iter_next_step( 1058 name_to_tool_map, 1059 color_mapping, 1060 inputs, 1061 intermediate_steps, 1062 run_manager, 1063 ) 1064 ] 1065 )

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/agents/agent.py:1055, in (.0) 1046 def _take_next_step( 1047 self, 1048 name_to_tool_map: Dict[str, BaseTool], (...) 1052 run_manager: Optional[CallbackManagerForChainRun] = None, 1053 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]: 1054 return self._consume_next_step( -> 1055 [ 1056 a 1057 for a in self._iter_next_step( 1058 name_to_tool_map, 1059 color_mapping, 1060 inputs, 1061 intermediate_steps, 1062 run_manager, 1063 ) 1064 ] 1065 )

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/agents/agent.py:1083, in AgentExecutor._iter_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager) 1080 intermediate_steps = self._prepare_intermediate_steps(intermediate_steps) 1082 # Call the LLM to see what to do. -> 1083 output = self.agent.plan( 1084 intermediate_steps, 1085 callbacks=run_manager.get_child() if run_manager else None, 1086 **inputs, 1087 ) 1088 except OutputParserException as e: 1089 if isinstance(self.handle_parsing_errors, bool):

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain/agents/agent.py:386, in RunnableAgent.plan(self, intermediate_steps, callbacks, kwargs) 374 """Given input, decided what to do. 375 376 Args: (...) 383 Action specifying what tool to use. 384 """ 385 inputs = {kwargs, **{"intermediate_steps": intermediate_steps}} --> 386 output = self.runnable.invoke(inputs, config={"callbacks": callbacks}) 387 return output

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain_core/runnables/base.py:1774, in RunnableSequence.invoke(self, input, config) 1772 try: 1773 for i, step in enumerate(self.steps): -> 1774 input = step.invoke( 1775 input, 1776 # mark each step as a child run 1777 patch_config( 1778 config, callbacks=run_manager.get_child(f"seq:step:{i+1}") 1779 ), 1780 ) 1781 # finish the root run 1782 except BaseException as e:

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain_core/runnables/base.py:3074, in RunnableLambda.invoke(self, input, config, kwargs) 3072 """Invoke this runnable synchronously.""" 3073 if hasattr(self, "func"): -> 3074 return self._call_with_config( 3075 self._invoke, 3076 input, 3077 self._config(config, self.func), 3078 kwargs, 3079 ) 3080 else: 3081 raise TypeError( 3082 "Cannot invoke a coroutine function synchronously." 3083 "Use ainvoke instead." 3084 )

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain_core/runnables/base.py:975, in Runnable._call_with_config(self, func, input, config, run_type, kwargs) 971 context = copy_context() 972 context.run(var_child_runnable_config.set, child_config) 973 output = cast( 974 Output, --> 975 context.run( 976 call_func_with_variable_args, 977 func, # type: ignore[arg-type] 978 input, # type: ignore[arg-type] 979 config, 980 run_manager, 981 kwargs, 982 ), 983 ) 984 except BaseException as e: 985 run_manager.on_chain_error(e)

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain_core/runnables/config.py:323, in call_func_with_variable_args(func, input, config, run_manager, kwargs) 321 if run_manager is not None and accepts_run_manager(func): 322 kwargs["run_manager"] = run_manager --> 323 return func(input, kwargs)

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain_core/runnables/base.py:2950, in RunnableLambda._invoke(self, input, run_manager, config, kwargs) 2948 output = chunk 2949 else: -> 2950 output = call_func_with_variable_args( 2951 self.func, input, config, run_manager, kwargs 2952 ) 2953 # If the output is a runnable, invoke it 2954 if isinstance(output, Runnable):

File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/langchain_core/runnables/config.py:323, in call_func_with_variable_args(func, input, config, run_manager, kwargs) 321 if run_manager is not None and accepts_run_manager(func): 322 kwargs["run_manager"] = run_manager --> 323 return func(input, kwargs)

File , line 11, in parse(output) 9 function_call = output.additional_kwargs["function_call"] 10 name = function_call["name"] ---> 11 inputs = json.loads(function_call["arguments"]) 13 # If the Response function was invoked, return to the user with the function inputs 14 if name == "Response":

File /usr/lib/python3.10/json/init.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw) 341 s = s.decode(detect_encoding(s), 'surrogatepass') 343 if (cls is None and object_hook is None and 344 parse_int is None and parse_float is None and 345 parse_constant is None and object_pairs_hook is None and not kw): --> 346 return _default_decoder.decode(s) 347 if cls is None: 348 cls = JSONDecoder

File /usr/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w) 332 def decode(self, s, _w=WHITESPACE.match): 333 """Return the Python representation of s (a str instance 334 containing a JSON document). 335 336 """ --> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 338 end = _w(s, end).end() 339 if end != len(s):

File /usr/lib/python3.10/json/decoder.py:353, in JSONDecoder.raw_decode(self, s, idx) 344 """Decode a JSON document from s (a str beginning with 345 a JSON document) and return a 2-tuple of the Python 346 representation and the index in s where the document ended. (...) 350 351 """ 352 try: --> 353 obj, end = self.scan_once(s, idx) 354 except StopIteration as err: 355 raise JSONDecodeError("Expecting value", s, err.value) from None

JSONDecodeError: Invalid control character at: line 2 column 631 (char 632)

Can someone help?

Description

Error half of the times

System Info

langchain==0.1.0 openai==1.7.0

Related Components

[X] LLMs/Chat Models
[X] Embedding Models
[X] Prompts / Prompt Templates / Prompt Selectors
[X] Output Parsers
[ ] Document Loaders
[X] Vector Stores / Retrievers
[ ] Memory
[X] Agents / Agent Executors
[X] Tools / Toolkits
[ ] Chains
[ ] Callbacks/Tracing
[ ] Async

dosubot[bot] commented 9 months ago

🤖

Hello @glejdis! 👋

I'm Dosu, a friendly bot here to lend a hand with your LangChain issues and questions, while we wait for the human maintainers. I'll do my best to help you squash those pesky bugs, answer your burning questions, and even guide you on your path to becoming a contributor if you'd like. I'm here to make your coding journey a little smoother. So, let's get started, shall we? 🚀

Based on the error you're encountering and the context provided, it seems like the function_call["arguments"] string occasionally contains control characters, which are causing the json.loads() function to raise a JSONDecodeError. This is a known issue and has been discussed in similar contexts in the LangChain repository.

The solution to this problem is to sanitize the function_call["arguments"] string to remove any control characters before attempting to parse it as JSON. You can use Python's str.translate method with str.maketrans to achieve this. Here's how you can modify the parse function:

def parse(output):
    # ... existing code ...

    # Parse out the function call
    function_call = output.additional_kwargs["function_call"]
    name = function_call["name"]

    # Sanitize the arguments to remove control characters
    sanitized_arguments = function_call["arguments"].translate(str.maketrans('', '', ''.join(map(chr, range(32)))))
    inputs = json.loads(sanitized_arguments)

    # ... existing code ...

This modification will clean the arguments string from any control characters before attempting to parse it as JSON, which should resolve the issue you're facing with the custom parser.

This solution was suggested in the following issues in the LangChain repository:

Please try this solution and let me know if it resolves your issue. If you're still encountering the issue after applying this fix, please provide more details so I can further assist you.

Sources

This response is meant to be useful and save you time. It is not meant to be a precise solution, but rather a starting point for your own research.

Help me be more useful! Please leave a 👍 if this is helpful and 👎 if it is irrelevant.

If you want to continue the conversation, start your reply with @dosu-bot.

nfcampos commented 9 months ago

@glejdis hi, which llm are you using?

glejdis commented 9 months ago

@nfcampos

llm = AzureChatOpenAI( deployment_name = "gpt-4_32k", openai_api_version = "2023-07-01-preview", temperature = 0, openai_api_key=XXXXX, openai_api_type="azure", azure_endpoint=api_base )

Model used is gpt-4 32k.

I suspect it might be part of parsing. Is there some easier parser we can use with this model, since we are not customising the parser ourselves. If usually fails when the agent goes from one step to the next step.

mewh commented 9 months ago

I got this issue when receiving Japanese text from OpenAI. This seems to be a issue specific to the turbo models (gpt-4-1106-preview). See this post on the OpenAI forums. Changing to gpt-4 fixed the problem.

glejdis commented 9 months ago

@mewh

Thank you for your input! However, I am not using gpt-4-1106-preview. I am using gpt-4-0613. So I believe this is caused by the parsing function I am using and its compatibility with the langchain agent.

langchain-ai / langchain