haesleinhuepf / human-eval-bia

Benchmarking Large Language Models for Bio-Image Analysis Code Generation
MIT License
13 stars 4 forks source link

Mistral benchmarking on blablador currently fails #55

Open haesleinhuepf opened 2 months ago

haesleinhuepf commented 2 months ago

In this notebook, the sanity check fails with Mistral/blablador (use_blablador_mistral = True and all others false)

---------------------------------------------------------------------------
BadRequestError                           Traceback (most recent call last)
Cell In[17], line 2
      1 for key, func in code_generators.items():
----> 2     print(key, func("def print_hello_world():\n"))

Cell In[12], line 10, in generate_one_completion_blablador_mistral(input_code)
      8 client.base_url = 'https://helmholtz-blablador.fz-juelich.de:8000/v1'
      9 client.api_key = os.environ.get('BLABLADOR_API_KEY')
---> 10 response = client.chat.completions.create(
     11     model=model_blablador_mistral,
     12     messages=[{"role": "user", "content": setup_prompt(input_code)}],
     13 )
     14 return response.choices[0].message.content.strip()

File ~\mambaforge\envs\heb\lib\site-packages\openai\_utils\_utils.py:303, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
    301             msg = f"Missing required argument: {quote(missing[0])}"
    302     raise TypeError(msg)
--> 303 return func(*args, **kwargs)

File [~\mambaforge\envs\heb\lib\site-packages\openai\resources\chat\completions.py:645](http://localhost:8888/lab/tree/demo/~/mambaforge/envs/heb/lib/site-packages/openai/resources/chat/completions.py#line=644), in Completions.create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, presence_penalty, response_format, seed, stop, stream, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
    596 @required_args(["messages", "model"], ["messages", "model", "stream"])
    597 def create(
    598     self,
   (...)
    643     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
    644 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
--> 645     return self._post(
    646         "/chat/completions",
    647         body=maybe_transform(
    648             {
    649                 "messages": messages,
    650                 "model": model,
    651                 "frequency_penalty": frequency_penalty,
    652                 "function_call": function_call,
    653                 "functions": functions,
    654                 "logit_bias": logit_bias,
    655                 "logprobs": logprobs,
    656                 "max_tokens": max_tokens,
    657                 "n": n,
    658                 "presence_penalty": presence_penalty,
    659                 "response_format": response_format,
    660                 "seed": seed,
    661                 "stop": stop,
    662                 "stream": stream,
    663                 "temperature": temperature,
    664                 "tool_choice": tool_choice,
    665                 "tools": tools,
    666                 "top_logprobs": top_logprobs,
    667                 "top_p": top_p,
    668                 "user": user,
    669             },
    670             completion_create_params.CompletionCreateParams,
    671         ),
    672         options=make_request_options(
    673             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
    674         ),
    675         cast_to=ChatCompletion,
    676         stream=stream or False,
    677         stream_cls=Stream[ChatCompletionChunk],
    678     )

File [~\mambaforge\envs\heb\lib\site-packages\openai\_base_client.py:1088](http://localhost:8888/lab/tree/demo/~/mambaforge/envs/heb/lib/site-packages/openai/_base_client.py#line=1087), in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
   1074 def post(
   1075     self,
   1076     path: str,
   (...)
   1083     stream_cls: type[_StreamT] | None = None,
   1084 ) -> ResponseT | _StreamT:
   1085     opts = FinalRequestOptions.construct(
   1086         method="post", url=path, json_data=body, files=to_httpx_files(files), **options
   1087     )
-> 1088     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File [~\mambaforge\envs\heb\lib\site-packages\openai\_base_client.py:853](http://localhost:8888/lab/tree/demo/~/mambaforge/envs/heb/lib/site-packages/openai/_base_client.py#line=852), in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
    844 def request(
    845     self,
    846     cast_to: Type[ResponseT],
   (...)
    851     stream_cls: type[_StreamT] | None = None,
    852 ) -> ResponseT | _StreamT:
--> 853     return self._request(
    854         cast_to=cast_to,
    855         options=options,
    856         stream=stream,
    857         stream_cls=stream_cls,
    858         remaining_retries=remaining_retries,
    859     )

File [~\mambaforge\envs\heb\lib\site-packages\openai\_base_client.py:930](http://localhost:8888/lab/tree/demo/~/mambaforge/envs/heb/lib/site-packages/openai/_base_client.py#line=929), in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls)
    927     if not err.response.is_closed:
    928         err.response.read()
--> 930     raise self._make_status_error_from_response(err.response) from None
    932 return self._process_response(
    933     cast_to=cast_to,
    934     options=options,
   (...)
    937     stream_cls=stream_cls,
    938 )

BadRequestError: Error code: 400 - {'object': 'error', 'message': 'Only 4 - CosmoSage answers your cosmology questions&&text-embedding-ada-002&&5 - GritLM-7B - For Text-Embeddings&&1 - Mistral-7B-Instruct-v0.2 - the best option in general - fast and good&&alias-embeddings&&3 - starcoder2-15b - A model for programming&&2 - Mixtral-8x7B-Instruct-v0.1 Slower with higher quality&&alias-code&&alias-large&&alias-fast&&text-davinci-003&&gpt-3.5-turbo allowed now, your model Mistral-7B-Instruct-v0.2', 'code': 40301}