from huggingface_hub import InferenceClient
repo_id ="google/flan-t5-large"
llm_client = InferenceClient(
model=repo_id,
timeout=120,
)
llm_client.text_generation(prompt="How are you today?", max_new_tokens=20)
Logs
/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py:1539: UserWarning: API endpoint/model for text-generation is not served via TGI. Ignoring parameters ['watermark', 'stop', 'details', 'decoder_input_details'].
warnings.warn(
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: 41KOne7yQysab9S10EsGR)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text', 'stop', 'details', 'decoder_input_details', 'watermark'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: U549qDzut6BJ841udS3Zj)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: QeWi_jindnw2dYKX6UC6O)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: ax-No2vHNnYPl4HyAVEXR)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: 2EmppJDF3ii4T4ETboPtJ)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: YoHvLX-0an-RefRKT1DyY)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: OTnJwskna5RCyrdvIJtE4)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: nn0y1OdeaKj3u6yKqM9LE)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
response.raise_for_status()
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/requests/models.py", line 1021, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://api-inference.huggingface.co/models/google/flan-t5-large
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 1559, in text_generation
bytes_output = self.post(json=payload, model=model, task="text-generation", stream=stream) # type: ignore
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/inference/_client.py", line 243, in post
hf_raise_for_status(response)
File "/home/anakin87/apps/haystack/venv/lib/python3.8/site-packages/huggingface_hub/utils/_errors.py", line 358, in hf_raise_for_status
raise BadRequestError(message, response=response) from e
huggingface_hub.utils._errors.BadRequestError: (Request ID: Ucf2U2y1fVGbgjpJQEOyd)
Bad request:
The following `model_kwargs` are not used by the model: ['return_full_text'] (note: typos in the generate arguments will also show up in this list)
... (manually stopped)
Describe the bug
When using some models not served via TGI (e.g.,
google/flan-t5-large
), the generation hangs indefinitely.(Related: https://github.com/deepset-ai/haystack/issues/6816)
Reproduction
Logs
System info