Open amerodeh opened 12 months ago
Tried just adding the new model to the
C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\__init__.py
file, but it errors, so probs more work needed.
Traceback (most recent call last):
File "C:\Users\user\Downloads\tools\whisper-auto-transcribe-0.2.0\cli.py", line 70, in <module>
cli()
File "C:\Users\user\Downloads\tools\whisper-auto-transcribe-0.2.0\cli.py", line 53, in cli
res, used_time = easy_task(
File "C:\Users\user\Downloads\tools\whisper-auto-transcribe-0.2.0\task.py", line 51, in easy_task
output_path, _ = task_start(
File "C:\Users\user\Downloads\tools\whisper-auto-transcribe-0.2.0\task.py", line 80, in task_start
result = model.transcribe(file_path, language=language, task=task, verbose=False)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\transcribe.py", line 181, in transcribe
result: DecodingResult = decode_with_fallback(segment)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\transcribe.py", line 117, in decode_with_fallback
decode_result = model.decode(segment, options)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\decoding.py", line 705, in decode
result = DecodingTask(model, options).run(mel)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\decoding.py", line 621, in run
audio_features: Tensor = self._get_audio_features(mel) # encoder forward pass
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\decoding.py", line 565, in _get_audio_features
audio_features = self.model.encoder(mel)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\model.py", line 149, in forward
x = F.gelu(self.conv1(x))
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\conv.py", line 307, in forward
return self._conv_forward(input, self.weight, self.bias)
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\whisper\model.py", line 43, in _conv_forward
return super()._conv_forward(
File "C:\Users\user\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\nn\modules\conv.py", line 303, in _conv_forward
return F.conv1d(input, weight, bias, self.stride,
RuntimeError: Given groups=1, weight of size [1280, 128, 3], expected input[1, 80, 3000] to have 128 channels, but got 80 channels instead
Whisper v3 got released https://github.com/openai/whisper/commit/c5d42560760a05584c1c79546a098287e5a771eb https://news.ycombinator.com/item?id=38166965
Any upgrades for this project? If I'm reading correctly, the main improvement is in the model file, so probably replacing
C:\Users\%USER%\.cache\whisper\large-v2.pt
with the new one should be enough?