Thank you for your contribution. I tried to re-implement your code, but it has an error. Could you help me address it? Thank you.
my code is follow:
"""
from transformers import pipeline
transcriber = pipeline("automatic-speech-recognition", model="vinai/PhoWhisper-large", device="cuda") #small, medium, large
output = transcriber("/mnt/work1/phat/CODE/whisper/data/Hop_tac_qt.mp3")
print(output['text'])
"""
My error:
"""
(whisper) edward@edward-All-Series:/mnt/work1/phat/CODE/whisper$ python phoWhisper.py
Error: unable to open display
Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass language='en'.
Traceback (most recent call last):
File "/mnt/work1/phat/CODE/whisper/phoWhisper.py", line 3, in
output = transcriber("/mnt/work1/phat/CODE/whisper/data/Hop_tac_qt.mp3")
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/automatic_speech_recognition.py", line 285, in call
return super().call(inputs, kwargs)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/base.py", line 1235, in call
return next(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/pt_utils.py", line 124, in next
item = next(self.iterator)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/pt_utils.py", line 269, in next
processed = self.infer(next(self.iterator), self.params)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/base.py", line 1150, in forward
model_outputs = self._forward(model_inputs, *forward_params)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/automatic_speech_recognition.py", line 508, in _forward
tokens = self.model.generate(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/models/whisper/generation_whisper.py", line 578, in generate
outputs = super().generate(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 28, in decorate_context
return func(args, **kwargs)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/utils.py", line 1758, in generate
result = self._sample(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/utils.py", line 2410, in _sample
next_token_scores = logits_processor(input_ids, next_token_logits)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/logits_process.py", line 98, in call
scores = processor(input_ids, scores)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/logits_process.py", line 1787, in call
scores_processed = torch.where(suppress_token_mask, -float("inf"), scores)
RuntimeError: expected scalar type double but found float
"""
Thank you for your contribution. I tried to re-implement your code, but it has an error. Could you help me address it? Thank you.
my code is follow: """ from transformers import pipeline transcriber = pipeline("automatic-speech-recognition", model="vinai/PhoWhisper-large", device="cuda") #small, medium, large output = transcriber("/mnt/work1/phat/CODE/whisper/data/Hop_tac_qt.mp3") print(output['text']) """
My error: """ (whisper) edward@edward-All-Series:/mnt/work1/phat/CODE/whisper$ python phoWhisper.py Error: unable to open display Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass
output = transcriber("/mnt/work1/phat/CODE/whisper/data/Hop_tac_qt.mp3")
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/automatic_speech_recognition.py", line 285, in call
return super().call(inputs, kwargs)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/base.py", line 1235, in call
return next(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/pt_utils.py", line 124, in next
item = next(self.iterator)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/pt_utils.py", line 269, in next
processed = self.infer(next(self.iterator), self.params)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/base.py", line 1150, in forward
model_outputs = self._forward(model_inputs, *forward_params)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/pipelines/automatic_speech_recognition.py", line 508, in _forward
tokens = self.model.generate(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/models/whisper/generation_whisper.py", line 578, in generate
outputs = super().generate(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 28, in decorate_context
return func(args, **kwargs)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/utils.py", line 1758, in generate
result = self._sample(
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/utils.py", line 2410, in _sample
next_token_scores = logits_processor(input_ids, next_token_logits)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/logits_process.py", line 98, in call
scores = processor(input_ids, scores)
File "/home/edward/.conda/envs/whisper/lib/python3.9/site-packages/transformers/generation/logits_process.py", line 1787, in call
scores_processed = torch.where(suppress_token_mask, -float("inf"), scores)
RuntimeError: expected scalar type double but found float
"""
language='en'
. Traceback (most recent call last): File "/mnt/work1/phat/CODE/whisper/phoWhisper.py", line 3, in