To create a public link, set share=True in launch().
No language specified, language will be first be detected for each audio file (increases inference time).
Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.4.0. To apply the upgrade to your files permanently, run python -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\lenovo\.cache\torch\whisperx-vad-segmentation.bin
Model was trained with pyannote.audio 0.0.1, yours is 3.1.1. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.3.1+cu121. Bad things might happen unless you revert torch to 1.x.
Loaded Whisper model
Traceback (most recent call last):
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\queueing.py", line 536, in process_events
response = await route_utils.call_process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\route_utils.py", line 288, in call_process_api
output = await app.get_blocks().process_api(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\blocks.py", line 1931, in process_api
result = await self.call_function(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\blocks.py", line 1516, in call_function
prediction = await anyio.to_thread.run_sync( # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\anyio_backends_asyncio.py", line 2177, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\anyio_backends_asyncio.py", line 859, in run
result = context.run(func, args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\utils.py", line 826, in wrapper
response = f(args, *kwargs)
^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\utils.py", line 826, in wrapper
response = f(args, kwargs)
^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\webui.py", line 405, in transcribe_other_language_proxy
process_audio_files(base_directory=dataset_dir,
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\modules\tortoise_dataset_tools\dataset_whisper_tools\dataset_maker_large_files.py", line 230, in process_audio_files
run_whisperx(new_audio_path, srt_output_dir, language, int(chunk_size), no_align, whisper_model)
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\modules\tortoise_dataset_tools\dataset_whisper_tools\dataset_maker_large_files.py", line 83, in run_whisperx
result = whisper_model.transcribe(audio=audio,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\whisperx\asr.py", line 218, in transcribe
for idx, out in enumerate(self.call(data(audio, vad_segments), batch_size=batch_size, num_workers=num_workers)):
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\transformers\pipelines\pt_utils.py", line 124, in next
item = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\transformers\pipelines\pt_utils.py", line 125, in next
processed = self.infer(item, self.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\transformers\pipelines\base.py", line 1112, in forward
model_outputs = self._forward(model_inputs, **forward_params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\whisperx\asr.py", line 152, in _forward
outputs = self.model.generate_segment_batched(model_inputs['inputs'], self.tokenizer, self.options)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\whisperx\asr.py", line 53, in generate_segment_batched
result = self.model.generate(
^^^^^^^^^^^^^^^^^^^^
RuntimeError: CUDA failed with error out of memory
I think its due to my GPU.. Below are my specs
To create a public link, set
share=True
inlaunch()
. No language specified, language will be first be detected for each audio file (increases inference time). Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.4.0. To apply the upgrade to your files permanently, runpython -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\lenovo\.cache\torch\whisperx-vad-segmentation.bin
Model was trained with pyannote.audio 0.0.1, yours is 3.1.1. Bad things might happen unless you revert pyannote.audio to 0.x. Model was trained with torch 1.10.0+cu102, yours is 2.3.1+cu121. Bad things might happen unless you revert torch to 1.x. Loaded Whisper model Traceback (most recent call last): File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\queueing.py", line 536, in process_events response = await route_utils.call_process_api( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\route_utils.py", line 288, in call_process_api output = await app.get_blocks().process_api( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\blocks.py", line 1931, in process_api result = await self.call_function( ^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\blocks.py", line 1516, in call_function prediction = await anyio.to_thread.run_sync( # type: ignore ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync return await get_async_backend().run_sync_in_worker_thread( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\anyio_backends_asyncio.py", line 2177, in run_sync_in_worker_thread return await future ^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\anyio_backends_asyncio.py", line 859, in run result = context.run(func, args) ^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\utils.py", line 826, in wrapper response = f(args, *kwargs) ^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\gradio\utils.py", line 826, in wrapper response = f(args, kwargs) ^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\webui.py", line 405, in transcribe_other_language_proxy process_audio_files(base_directory=dataset_dir, File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\modules\tortoise_dataset_tools\dataset_whisper_tools\dataset_maker_large_files.py", line 230, in process_audio_files run_whisperx(new_audio_path, srt_output_dir, language, int(chunk_size), no_align, whisper_model) File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\modules\tortoise_dataset_tools\dataset_whisper_tools\dataset_maker_large_files.py", line 83, in run_whisperx result = whisper_model.transcribe(audio=audio, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\whisperx\asr.py", line 218, in transcribe for idx, out in enumerate(self.call(data(audio, vad_segments), batch_size=batch_size, num_workers=num_workers)): File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\transformers\pipelines\pt_utils.py", line 124, in next item = next(self.iterator) ^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\transformers\pipelines\pt_utils.py", line 125, in next processed = self.infer(item, self.params) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\transformers\pipelines\base.py", line 1112, in forward model_outputs = self._forward(model_inputs, **forward_params) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\whisperx\asr.py", line 152, in _forward outputs = self.model.generate_segment_batched(model_inputs['inputs'], self.tokenizer, self.options) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\lenovo\Desktop\StyleTTS\StyleTTS-WebUI\venv\Lib\site-packages\whisperx\asr.py", line 53, in generate_segment_batched result = self.model.generate( ^^^^^^^^^^^^^^^^^^^^ RuntimeError: CUDA failed with error out of memory