langchain-ai / langchain

🦜🔗 Build context-aware reasoning applications
https://python.langchain.com
MIT License
92.32k stars 14.75k forks source link

PermissionDeniedError: Streaming is not allowed. Set value: "stream":false #22205

Open spsingh559 opened 3 months ago

spsingh559 commented 3 months ago

Checked other resources

Example Code

# Initialize the CONVERSATIONAL_REACT_DESCRIPTION agent
from langchain import hub
from langchain_community.llms import OpenAI
from langchain.agents import AgentExecutor, create_react_agent

react_agent = create_react_agent(llm, tools, prompt, output_parser=json_parser)

from langchain.agents import AgentExecutor, create_react_agent
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=react_agent, tools=tools, verbose=True)

agent_executor.invoke(
    {
        "input": "what's my name? Only use a tool if needed, otherwise respond with Final Answer",
        # Notice that chat_history is a string, since this prompt is aimed at LLMs, not chat models
        "chat_history": "Human: Hi! My name is Bob\nAI: Hello Bob! Nice to meet you",
    }
)```

### Error Message and Stack Trace (if applicable)

Entering new AgentExecutor chain...
---------------------------------------------------------------------------
PermissionDeniedError                     Traceback (most recent call last)
Cell In[60], line 3
      1 from langchain_core.messages import AIMessage, HumanMessage
----> 3 agent_executor.invoke(
      4     {
      5         "input": "what's my name? Only use a tool if needed, otherwise respond with Final Answer",
      6         # Notice that chat_history is a string, since this prompt is aimed at LLMs, not chat models
      7         "chat_history": "Human: Hi! My name is Bob\nAI: Hello Bob! Nice to meet you",
      8     }
      9 )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain/chains/base.py:166, in Chain.invoke(self, input, config, **kwargs)
    164 except BaseException as e:
    165     run_manager.on_chain_error(e)
--> 166     raise e
    167 run_manager.on_chain_end(outputs)
    169 if include_run_info:

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain/chains/base.py:156, in Chain.invoke(self, input, config, **kwargs)
    153 try:
    154     self._validate_inputs(inputs)
    155     outputs = (
--> 156         self._call(inputs, run_manager=run_manager)
    157         if new_arg_supported
    158         else self._call(inputs)
    159     )
    161     final_outputs: Dict[str, Any] = self.prep_outputs(
    162         inputs, outputs, return_only_outputs
    163     )
    164 except BaseException as e:

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain/agents/agent.py:1433, in AgentExecutor._call(self, inputs, run_manager)
   1431 # We now enter the agent loop (until it returns something).
   1432 while self._should_continue(iterations, time_elapsed):
-> 1433     next_step_output = self._take_next_step(
   1434         name_to_tool_map,
   1435         color_mapping,
   1436         inputs,
   1437         intermediate_steps,
   1438         run_manager=run_manager,
   1439     )
   1440     if isinstance(next_step_output, AgentFinish):
   1441         return self._return(
   1442             next_step_output, intermediate_steps, run_manager=run_manager
   1443         )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain/agents/agent.py:1139, in AgentExecutor._take_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
   1130 def _take_next_step(
   1131     self,
   1132     name_to_tool_map: Dict[str, BaseTool],
   (...)
   1136     run_manager: Optional[CallbackManagerForChainRun] = None,
   1137 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
   1138     return self._consume_next_step(
-> 1139         [
   1140             a
   1141             for a in self._iter_next_step(
   1142                 name_to_tool_map,
   1143                 color_mapping,
   1144                 inputs,
   1145                 intermediate_steps,
   1146                 run_manager,
   1147             )
   1148         ]
   1149     )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain/agents/agent.py:1167, in AgentExecutor._iter_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
   1164     intermediate_steps = self._prepare_intermediate_steps(intermediate_steps)
   1166     # Call the LLM to see what to do.
-> 1167     output = self.agent.plan(
   1168         intermediate_steps,
   1169         callbacks=run_manager.get_child() if run_manager else None,
   1170         **inputs,
   1171     )
   1172 except OutputParserException as e:
   1173     if isinstance(self.handle_parsing_errors, bool):

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain/agents/agent.py:398, in RunnableAgent.plan(self, intermediate_steps, callbacks, **kwargs)
    390 final_output: Any = None
    391 if self.stream_runnable:
    392     # Use streaming to make sure that the underlying LLM is invoked in a
    393     # streaming
   (...)
    396     # Because the response from the plan is not a generator, we need to
    397     # accumulate the output into final output and return that.
--> 398     for chunk in self.runnable.stream(inputs, config={"callbacks": callbacks}):
    399         if final_output is None:
    400             final_output = chunk

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:2769, in RunnableSequence.stream(self, input, config, **kwargs)
   2763 def stream(
   2764     self,
   2765     input: Input,
   2766     config: Optional[RunnableConfig] = None,
   2767     **kwargs: Optional[Any],
   2768 ) -> Iterator[Output]:
-> 2769     yield from self.transform(iter([input]), config, **kwargs)

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:2756, in RunnableSequence.transform(self, input, config, **kwargs)
   2750 def transform(
   2751     self,
   2752     input: Iterator[Input],
   2753     config: Optional[RunnableConfig] = None,
   2754     **kwargs: Optional[Any],
   2755 ) -> Iterator[Output]:
-> 2756     yield from self._transform_stream_with_config(
   2757         input,
   2758         self._transform,
   2759         patch_config(config, run_name=(config or {}).get("run_name") or self.name),
   2760         **kwargs,
   2761     )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:1772, in Runnable._transform_stream_with_config(self, input, transformer, config, run_type, **kwargs)
   1770 try:
   1771     while True:
-> 1772         chunk: Output = context.run(next, iterator)  # type: ignore
   1773         yield chunk
   1774         if final_output_supported:

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:2720, in RunnableSequence._transform(self, input, run_manager, config)
   2711 for step in steps:
   2712     final_pipeline = step.transform(
   2713         final_pipeline,
   2714         patch_config(
   (...)
   2717         ),
   2718     )
-> 2720 for output in final_pipeline:
   2721     yield output

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/output_parsers/transform.py:50, in BaseTransformOutputParser.transform(self, input, config, **kwargs)
     44 def transform(
     45     self,
     46     input: Iterator[Union[str, BaseMessage]],
     47     config: Optional[RunnableConfig] = None,
     48     **kwargs: Any,
     49 ) -> Iterator[T]:
---> 50     yield from self._transform_stream_with_config(
     51         input, self._transform, config, run_type="parser"
     52     )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:1736, in Runnable._transform_stream_with_config(self, input, transformer, config, run_type, **kwargs)
   1734 input_for_tracing, input_for_transform = tee(input, 2)
   1735 # Start the input iterator to ensure the input runnable starts before this one
-> 1736 final_input: Optional[Input] = next(input_for_tracing, None)
   1737 final_input_supported = True
   1738 final_output: Optional[Output] = None

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:4638, in RunnableBindingBase.transform(self, input, config, **kwargs)
   4632 def transform(
   4633     self,
   4634     input: Iterator[Input],
   4635     config: Optional[RunnableConfig] = None,
   4636     **kwargs: Any,
   4637 ) -> Iterator[Output]:
-> 4638     yield from self.bound.transform(
   4639         input,
   4640         self._merge_configs(config),
   4641         **{**self.kwargs, **kwargs},
   4642     )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/runnables/base.py:1166, in Runnable.transform(self, input, config, **kwargs)
   1163             final = ichunk
   1165 if got_first_val:
-> 1166     yield from self.stream(final, config, **kwargs)

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py:265, in BaseChatModel.stream(self, input, config, stop, **kwargs)
    258 except BaseException as e:
    259     run_manager.on_llm_error(
    260         e,
    261         response=LLMResult(
    262             generations=[[generation]] if generation else []
    263         ),
    264     )
--> 265     raise e
    266 else:
    267     run_manager.on_llm_end(LLMResult(generations=[[generation]]))

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_core/language_models/chat_models.py:245, in BaseChatModel.stream(self, input, config, stop, **kwargs)
    243 generation: Optional[ChatGenerationChunk] = None
    244 try:
--> 245     for chunk in self._stream(messages, stop=stop, **kwargs):
    246         if chunk.message.id is None:
    247             chunk.message.id = f"run-{run_manager.run_id}"

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/langchain_openai/chat_models/base.py:480, in BaseChatOpenAI._stream(self, messages, stop, run_manager, **kwargs)
    477 params = {**params, **kwargs, "stream": True}
    479 default_chunk_class = AIMessageChunk
--> 480 with self.client.create(messages=message_dicts, **params) as response:
    481     for chunk in response:
    482         if not isinstance(chunk, dict):

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/openai/_utils/_utils.py:277, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
    275             msg = f"Missing required argument: {quote(missing[0])}"
    276     raise TypeError(msg)
--> 277 return func(*args, **kwargs)

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/openai/resources/chat/completions.py:590, in Completions.create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, presence_penalty, response_format, seed, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
    558 @required_args(["messages", "model"], ["messages", "model", "stream"])
    559 def create(
    560     self,
   (...)
    588     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
    589 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
--> 590     return self._post(
    591         "/chat/completions",
    592         body=maybe_transform(
    593             {
    594                 "messages": messages,
    595                 "model": model,
    596                 "frequency_penalty": frequency_penalty,
    597                 "function_call": function_call,
    598                 "functions": functions,
    599                 "logit_bias": logit_bias,
    600                 "logprobs": logprobs,
    601                 "max_tokens": max_tokens,
    602                 "n": n,
    603                 "presence_penalty": presence_penalty,
    604                 "response_format": response_format,
    605                 "seed": seed,
    606                 "stop": stop,
    607                 "stream": stream,
    608                 "stream_options": stream_options,
    609                 "temperature": temperature,
    610                 "tool_choice": tool_choice,
    611                 "tools": tools,
    612                 "top_logprobs": top_logprobs,
    613                 "top_p": top_p,
    614                 "user": user,
    615             },
    616             completion_create_params.CompletionCreateParams,
    617         ),
    618         options=make_request_options(
    619             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
    620         ),
    621         cast_to=ChatCompletion,
    622         stream=stream or False,
    623         stream_cls=Stream[ChatCompletionChunk],
    624     )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/openai/_base_client.py:1240, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
   1226 def post(
   1227     self,
   1228     path: str,
   (...)
   1235     stream_cls: type[_StreamT] | None = None,
   1236 ) -> ResponseT | _StreamT:
   1237     opts = FinalRequestOptions.construct(
   1238         method="post", url=path, json_data=body, files=to_httpx_files(files), **options
   1239     )
-> 1240     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/openai/_base_client.py:921, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
    912 def request(
    913     self,
    914     cast_to: Type[ResponseT],
   (...)
    919     stream_cls: type[_StreamT] | None = None,
    920 ) -> ResponseT | _StreamT:
--> 921     return self._request(
    922         cast_to=cast_to,
    923         options=options,
    924         stream=stream,
    925         stream_cls=stream_cls,
    926         remaining_retries=remaining_retries,
    927     )

File ~/workspace/projects/AI/GEN-AI/lang-chain-project/venv/lib/python3.12/site-packages/openai/_base_client.py:1020, in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls)
   1017         err.response.read()
   1019     log.debug("Re-raising status error")
-> 1020     raise self._make_status_error_from_response(err.response) from None
   1022 return self._process_response(
   1023     cast_to=cast_to,
   1024     options=options,
   (...)
   1027     stream_cls=stream_cls,
   1028 )

PermissionDeniedError: {"status":403,"title":"Forbidden","detail":"Streaming is not allowed. Set value: "stream":false"}

### Description

I am trying to use create_react_agent as alternative of initialize_agent and getting this error while invoking using agent executor.

I have also set the AzureChatOpenAI stream as false but keep getting the error.

llm = AzureChatOpenAI( api_key=OPENAI_KEY, azure_endpoint=OPENAI_URL, openai_api_version=openai_api_version, # type: ignore azure_deployment=azure_deployment, temperature=0.5, verbose=True, model_kwargs={"stream":False} # {"top_p": 0.1}

    )


### System Info

langchain==0.2.0
langchain-community==0.2.0
langchain-core==0.2.1
langchain-openai==0.1.7
openai==1.30.1

Python version: 3.11
Platform: Mac
vasiljevic commented 1 month ago

I have also experienced that ChatOpenAI (and consequently AzureChatOpenAI) ignores model_kwargs={"stream":False} and still use streaming under the hood when calling OpenAI API.

The solution in my scenario was to turn of the parallel_tool_calls option using the statement:

llm.bind(parallel_tool_calls=False)

Looks like parallel_tool_calls option requires the stream option in the OpenAI API layer and this (and possibly some other) option dependency can override your intention to turn of the stream option.