Open jake1271 opened 1 year ago
What's failing is the loading of the internal faster whisper model. The easiest fix would be to clone the used model manually once from Huggingface and then pass the local filepath as the --model
keyword.
git clone https://huggingface.co/guillaumekln/faster-whisper-large-v2 large-ct2
cd large-ct2 && git lfs pull --include=model.bin
And then use whisperx with --model large-ct2
I also met a similar issue without internet access. My code and error are as follows.
The weird thing is: When I turn the internet on, it works successfully. Did the load_model download some files implicitly?
BTW, faster_whisper can load my local model without internet successfully.
model = whisperx.load_model(
"/kaggle/working/whisper-small-ct2",
device,
compute_type=compute_type,
language="bn",
asr_options={"beam_size": 3},
)
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
File /opt/conda/lib/python3.10/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1347 try:
-> 1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
File /opt/conda/lib/python3.10/http/client.py:1283, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1282 """Send a complete request to the server."""
-> 1283 self._send_request(method, url, body, headers, encode_chunked)
File /opt/conda/lib/python3.10/http/client.py:1329, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1328 body = _encode(body, 'body')
-> 1329 self.endheaders(body, encode_chunked=encode_chunked)
File /opt/conda/lib/python3.10/http/client.py:1278, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1277 raise CannotSendHeader()
-> 1278 self._send_output(message_body, encode_chunked=encode_chunked)
File /opt/conda/lib/python3.10/http/client.py:1038, in HTTPConnection._send_output(self, message_body, encode_chunked)
1037 del self._buffer[:]
-> 1038 self.send(msg)
1040 if message_body is not None:
1041
1042 # create a consistent interface to message_body
File /opt/conda/lib/python3.10/http/client.py:976, in HTTPConnection.send(self, data)
975 if self.auto_open:
--> 976 self.connect()
977 else:
File /opt/conda/lib/python3.10/http/client.py:1448, in HTTPSConnection.connect(self)
1446 "Connect to a host on a given (SSL) port."
-> 1448 super().connect()
1450 if self._tunnel_host:
File /opt/conda/lib/python3.10/http/client.py:942, in HTTPConnection.connect(self)
941 sys.audit("http.client.connect", self, self.host, self.port)
--> 942 self.sock = self._create_connection(
943 (self.host,self.port), self.timeout, self.source_address)
944 # Might fail in OSs that don't implement TCP_NODELAY
File /opt/conda/lib/python3.10/socket.py:824, in create_connection(address, timeout, source_address)
823 err = None
--> 824 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
825 af, socktype, proto, canonname, sa = res
File /opt/conda/lib/python3.10/socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
956 af, socktype, proto, canonname, sa = res
gaierror: [Errno -3] Temporary failure in name resolution
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
Cell In[19], line 3
1 device = "cuda"
2 compute_type = "float32" #
----> 3 model = whisperx.load_model(
4 "/kaggle/working/whisper-small-ct2",
5 device,
6 compute_type=compute_type,
7 language="bn",
8 asr_options={"beam_size": 3},
9 )
File /opt/conda/lib/python3.10/site-packages/whisperx/asr.py:105, in load_model(whisper_arch, device, device_index, compute_type, asr_options, language, vad_options, model, task, download_root, threads)
102 if vad_options is not None:
103 default_vad_options.update(vad_options)
--> 105 vad_model = load_vad_model(torch.device(device), use_auth_token=None, **default_vad_options)
107 return FasterWhisperPipeline(
108 model=model,
109 vad=vad_model,
(...)
113 suppress_numerals=suppress_numerals,
114 )
File /opt/conda/lib/python3.10/site-packages/whisperx/vad.py:29, in load_vad_model(device, vad_onset, vad_offset, use_auth_token, model_fp)
26 raise RuntimeError(f"{model_fp} exists and is not a regular file")
28 if not os.path.isfile(model_fp):
---> 29 with urllib.request.urlopen(VAD_SEGMENTATION_URL) as source, open(model_fp, "wb") as output:
30 with tqdm(
31 total=int(source.info().get("Content-Length")),
32 ncols=80,
(...)
35 unit_divisor=1024,
36 ) as loop:
37 while True:
File /opt/conda/lib/python3.10/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
214 else:
215 opener = _opener
--> 216 return opener.open(url, data, timeout)
File /opt/conda/lib/python3.10/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout)
516 req = meth(req)
518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 519 response = self._open(req, data)
521 # post-process response
522 meth_name = protocol+"_response"
File /opt/conda/lib/python3.10/urllib/request.py:536, in OpenerDirector._open(self, req, data)
533 return result
535 protocol = req.type
--> 536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
539 return result
File /opt/conda/lib/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
494 for handler in handlers:
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
498 return result
File /opt/conda/lib/python3.10/urllib/request.py:1391, in HTTPSHandler.https_open(self, req)
1390 def https_open(self, req):
-> 1391 return self.do_open(http.client.HTTPSConnection, req,
1392 context=self._context, check_hostname=self._check_hostname)
File /opt/conda/lib/python3.10/urllib/request.py:1351, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
-> 1351 raise URLError(err)
1352 r = h.getresponse()
1353 except:
URLError: <urlopen error [Errno -3] Temporary failure in name resolution>
I have developed a custom script based on WhisperX that enables running various pipelines entirely offline. All you need to do is download the pre-trained models and specify their PATHs in the script.
Is there a way to run WhisperX locally with no internet once the specified model has been downloaded? Once in a while I get an error running the inference due to Hugging Face being down (error msg below). So when this happens I have to wait until Hugging Face can respond properly. This wouldn't be an issue but I plan to run WhisperX for prolonged periods of time so this would be an issue.
If no option/flag to make 100% local (after models downloaded), could someone point me in the right direction on where to make the code change?
Error msg: Whisperx error due to internet connection: Traceback (most recent call last): File "C:\Users\user1\anaconda3\envs\whisperxxx\lib\site-packages\huggingface_hub\utils_errors.py", line 259, in hf_raise_for_status response.raise_for_status() File "C:\Users\user1\anaconda3\envs\whisperxxx\lib\site-packages\requests\models.py", line 1021, in raise_for_status raise HTTPError(http_error_msg, response=self) requests.exceptions.HTTPError: 504 Server Error: Gateway Time-out for url: https://huggingface.co/api/models/guillaumekln/faster-whisper-medium/revision/main