Structured Output with Groq: Error code: 400 - {'error': {'message': 'response_format` does not support streaming', 'type': 'invalid_request_error'}}

weissenbacherpwc commented 1 week ago

Checked other resources

[X] I added a very descriptive title to this issue.
[X] I searched the LangChain documentation with the integrated search.
[X] I used the GitHub search to find a similar question and didn't find it.
[X] I am sure that this is a bug in LangChain rather than my code.
[X] The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).

Example Code

    class GradeDocuments(BaseModel):
        score: str = Field(
            description="Die Frage handelt sich um ein Smalltalk-Thema, 'True' oder 'False'"
        )

    def question_classifier(state: AgentState):
        question = state["question"]
        print(f"In question classifier with question: {question}")
        system = """<s>[INST] Du bewertest, ob es in sich bei der Frage des Nutzers um ein Smalltalk-Thema handelt oder nicht. \n
            Falls es bei der Frage um generelle Smalltalk-Fragen wie zum Beispiel: 'Hallo, wer bist du?' geht, bewerte es als 'True'. \n
            Falls es sich bei der Frage um eine spezifische Frage zu einem Thema handelt wie zum Beispiel: 'Nenne mir Vorteile von Multi CLoud' geht, bewerte die Frage mit 'False'.[/INST]"""

        grade_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system),
                (
                    "human",
                    "Frage des Nutzers: {question}",
                ),
            ]
        )

        #llm = ChatOpenAI() mit ChatOpenAI gehts, mit Chatgroq vorher iwie nicht mehr
        env_vars = dotenv_values('.env')
        load_dotenv()
        groq_key = env_vars.get("GROQ_API_KEY")
        print("Loading Structured Groq.")
        llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key = groq_key)
        structured_llm = llm.with_structured_output(GradeDocuments)
        grader_llm = grade_prompt | structured_llm
        result = grader_llm.invoke({"question": question})
        state["is_smalltalk"] = result.score
        return state

Error Message and Stack Trace (if applicable)

When it comes to grade_llm.invoke:

Error code: 400 - {'error': {'message': 'response_format` does not support streaming', 'type': 'invalid_request_error'}}

Description

Hi,

I want to use a Groq LLM for getting structured output, in my case it should return True or False. The code works fine with using ChatOpenAI() but it fails when using Groq even if it should work with structured output as I have read in the documentation.

I also tried structured_llm = llm.with_structured_output(GradeDocuments, method="json_mode") without success. I also already updated my langchain-groq version.

Does anyone have an idea how to solve this?

EDIT: I also tried with a simple example where it works with ChatOpenAI but not with Groq: With ChatOpenAI:

from langchain_groq import ChatGroq

class GradeDocuments(BaseModel):
    """Boolean values to check for relevance on retrieved documents."""

    score: str = Field(
        description="Die Frage handelt sich um ein Smalltalk-Thema, 'True' oder 'False'"
    )

model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0) # with this it works
#model = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key = "")
structured_llm = model.with_structured_output(GradeDocuments)
structured_llm.invoke("Hello, how are you?")
# Returns: GradeDocuments(score='False')

With ChatGroq the same errror like above. But if I use e.g. the Llama 3 model from Groq it works, so seems like it is an issue of the Mixtral 8x7B model

System Info

langchain-groq 0.1.5
langchain 0.2.5

keenborder786 commented 1 week ago

Can you please post the traceback?

weissenbacherpwc commented 6 days ago

Can you please post the traceback?

NotFoundError Traceback (most recent call last) /Users/mweissenba001/Documents/GitHub/fastapi_rag_demo/test.ipynb Zelle 14 line 2 21 #model = build_llm("modelle/sauerkrautlm-mixtral-8x7b-instruct.Q5_0.gguf", groq_llm=True, groq_stream=False) 22 structured_llm = model.with_structured_output(GradeDocuments) ---> 23 structured_llm.invoke("Hallo, wer bist du?")

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/runnables/base.py:2505, in RunnableSequence.invoke(self, input, config, kwargs) 2501 config = patch_config( 2502 config, callbacks=run_manager.get_child(f"seq:step:{i+1}") 2503 ) 2504 if i == 0: -> 2505 input = step.invoke(input, config, kwargs) 2506 else: 2507 input = step.invoke(input, config)

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/runnables/base.py:4588, in RunnableBindingBase.invoke(self, input, config, kwargs) 4582 def invoke( 4583 self, 4584 input: Input, 4585 config: Optional[RunnableConfig] = None, 4586 kwargs: Optional[Any], 4587 ) -> Output: -> 4588 return self.bound.invoke( 4589 input, 4590 self._merge_configs(config), 4591 {self.kwargs, **kwargs}, 4592 )

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:248, in BaseChatModel.invoke(self, input, config, stop, kwargs) 237 def invoke( 238 self, 239 input: LanguageModelInput, (...) 243 kwargs: Any, 244 ) -> BaseMessage: 245 config = ensure_config(config) 246 return cast( 247 ChatGeneration, --> 248 self.generate_prompt( 249 [self._convert_input(input)], 250 stop=stop, 251 callbacks=config.get("callbacks"), 252 tags=config.get("tags"), 253 metadata=config.get("metadata"), 254 run_name=config.get("run_name"), 255 run_id=config.pop("run_id", None), 256 **kwargs, 257 ).generations[0][0], 258 ).message

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:677, in BaseChatModel.generate_prompt(self, prompts, stop, callbacks, kwargs) 669 def generate_prompt( 670 self, 671 prompts: List[PromptValue], (...) 674 kwargs: Any, 675 ) -> LLMResult: 676 prompt_messages = [p.to_messages() for p in prompts] --> 677 return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:534, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs) 532 if run_managers: 533 run_managers[i].on_llm_error(e, response=LLMResult(generations=[])) --> 534 raise e 535 flattened_outputs = [ 536 LLMResult(generations=[res.generations], llm_output=res.llm_output) # type: ignore[list-item] 537 for res in results 538 ] 539 llm_output = self._combine_llm_outputs([res.llm_output for res in results])

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:524, in BaseChatModel.generate(self, messages, stop, callbacks, tags, metadata, run_name, run_id, kwargs) 521 for i, m in enumerate(messages): 522 try: 523 results.append( --> 524 self._generate_with_cache( 525 m, 526 stop=stop, 527 run_manager=run_managers[i] if run_managers else None, 528 kwargs, 529 ) 530 ) 531 except BaseException as e: 532 if run_managers:

File ~/anaconda3/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:749, in BaseChatModel._generate_with_cache(self, messages, stop, run_manager, kwargs) 747 else: 748 if inspect.signature(self._generate).parameters.get("run_manager"): --> 749 result = self._generate( 750 messages, stop=stop, run_manager=run_manager, kwargs 751 ) 752 else: 753 result = self._generate(messages, stop=stop, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/langchain_groq/chat_models.py:250, in ChatGroq._generate(self, messages, stop, run_manager, kwargs) 245 message_dicts, params = self._create_message_dicts(messages, stop) 246 params = { 247 params, 248 kwargs, 249 } --> 250 response = self.client.create(messages=message_dicts, params) 251 return self._create_chat_result(response)

File ~/anaconda3/lib/python3.11/site-packages/groq/resources/chat/completions.py:175, in Completions.create(self, messages, model, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, response_format, seed, stop, stream, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout) 132 def create( 133 self, 134 *, (...) 158 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, 159 ) -> ChatCompletion | Stream[ChatCompletionChunk]: 160 """ 161 Creates a completion for a chat prompt 162 (...) 173 timeout: Override the client-level default timeout for this request, in seconds 174 """ --> 175 return self._post( 176 "/openai/v1/chat/completions", 177 body=maybe_transform( 178 { 179 "messages": messages, 180 "model": model, 181 "frequency_penalty": frequency_penalty, 182 "logit_bias": logit_bias, 183 "logprobs": logprobs, 184 "max_tokens": max_tokens, 185 "n": n, 186 "presence_penalty": presence_penalty, 187 "response_format": response_format, 188 "seed": seed, 189 "stop": stop, 190 "stream": stream, 191 "temperature": temperature, 192 "tool_choice": tool_choice, 193 "tools": tools, 194 "top_logprobs": top_logprobs, 195 "top_p": top_p, 196 "user": user, 197 }, 198 completion_create_params.CompletionCreateParams, 199 ), 200 options=make_request_options( 201 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout 202 ), 203 cast_to=ChatCompletion, 204 stream=stream or False, 205 stream_cls=Stream[ChatCompletionChunk], 206 )

File ~/anaconda3/lib/python3.11/site-packages/groq/_base_client.py:1189, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls) 1175 def post( 1176 self, 1177 path: str, (...) 1184 stream_cls: type[_StreamT] | None = None, 1185 ) -> ResponseT | _StreamT: 1186 opts = FinalRequestOptions.construct( 1187 method="post", url=path, json_data=body, files=to_httpx_files(files), **options 1188 ) -> 1189 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~/anaconda3/lib/python3.11/site-packages/groq/_base_client.py:891, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls) 882 def request( 883 self, 884 cast_to: Type[ResponseT], (...) 889 stream_cls: type[_StreamT] | None = None, 890 ) -> ResponseT | _StreamT: --> 891 return self._request( 892 cast_to=cast_to, 893 options=options, 894 stream=stream, 895 stream_cls=stream_cls, 896 remaining_retries=remaining_retries, 897 )

File ~/anaconda3/lib/python3.11/site-packages/groq/_base_client.py:982, in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls) 979 err.response.read() 981 log.debug("Re-raising status error") --> 982 raise self._make_status_error_from_response(err.response) from None 984 return self._process_response( 985 cast_to=cast_to, 986 options=options, (...) 989 stream_cls=stream_cls, 990 )

NotFoundError: Error code: 404 - {'error': {'message': 'The model llama3-8b-81928 does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'code': 'model_not_found'}}

keenborder786 commented 5 days ago

@weissenbacherpwc thanks I will have a look.

langchain-ai / langchain