Closed gerdm closed 2 years ago
We can't really do anything about a 503 HTTP error other then changing the URL. I've just checked and https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip seems to be working fine. IIRC, we had some trouble in the past with EMNISTs download server being a little unstable. Our CI is testing the URLs on a daily basis and so far nothing has come up about permanently broken links. Could you retry?
Hi @pmeier,
The problem persists on my end, but apparently it's a problem with my machine. I've re-run the code in a VM and in Colab and it works.
Thanks for confirming.
I had a similar problem and maybe it helps. On my local machine (Ubuntu 21.10) I see that the link it tries to contact is https://cloudstor.aarnet.edu.au/plus/index.php/s/54h3OuGJhFLwAlQ/download and it fails; whereas in a G Colab machine, it connects to https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip and succeeds.
🐛 Describe the bug
It appears that the url torchvision considers to download the EMNIST dataset is broken.
The traceback at the bottom is the following:
Here's the full traceback
```python Downloading https://cloudstor.aarnet.edu.au/plus/index.php/s/54h3OuGJhFLwAlQ/download to ./EMNIST/raw/download 0it [00:00, ?it/s]Failed download. Trying https -> http instead. Downloading http://cloudstor.aarnet.edu.au/plus/index.php/s/54h3OuGJhFLwAlQ/download to ./EMNIST/raw/download --------------------------------------------------------------------------- HTTPError Traceback (most recent call last) File ~/miniforge3/lib/python3.9/site-packages/torchvision/datasets/utils.py:73, in download_url(url, root, filename, md5) 72 print('Downloading ' + url + ' to ' + fpath) ---> 73 urllib.request.urlretrieve( 74 url, fpath, 75 reporthook=gen_bar_updater() 76 ) 77 except OSError: File ~/miniforge3/lib/python3.9/urllib/request.py:239, in urlretrieve(url, filename, reporthook, data) 237 url_type, path = _splittype(url) --> 239 with contextlib.closing(urlopen(url, data)) as fp: 240 headers = fp.info() File ~/miniforge3/lib/python3.9/urllib/request.py:214, in urlopen(url, data, timeout, cafile, capath, cadefault, context) 213 opener = _opener --> 214 return opener.open(url, data, timeout) File ~/miniforge3/lib/python3.9/urllib/request.py:523, in OpenerDirector.open(self, fullurl, data, timeout) 522 meth = getattr(processor, meth_name) --> 523 response = meth(req, response) 525 return response File ~/miniforge3/lib/python3.9/urllib/request.py:632, in HTTPErrorProcessor.http_response(self, request, response) 631 if not (200 <= code < 300): --> 632 response = self.parent.error( 633 'http', request, response, code, msg, hdrs) 635 return response File ~/miniforge3/lib/python3.9/urllib/request.py:561, in OpenerDirector.error(self, proto, *args) 560 args = (dict, 'default', 'http_error_default') + orig_args --> 561 return self._call_chain(*args) File ~/miniforge3/lib/python3.9/urllib/request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args) 493 func = getattr(handler, meth_name) --> 494 result = func(*args) 495 if result is not None: File ~/miniforge3/lib/python3.9/urllib/request.py:641, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs) 640 def http_error_default(self, req, fp, code, msg, hdrs): --> 641 raise HTTPError(req.full_url, code, msg, hdrs, fp) HTTPError: HTTP Error 503: Service Unavailable During handling of the above exception, another exception occurred: HTTPError Traceback (most recent call last) Input In [2], in
----> 1 dataset = torchvision.datasets.EMNIST(root=".", split="byclass", download=True)
File ~/miniforge3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:259, in EMNIST.__init__(self, root, split, **kwargs)
257 self.training_file = self._training_file(split)
258 self.test_file = self._test_file(split)
--> 259 super(EMNIST, self).__init__(root, **kwargs)
File ~/miniforge3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:68, in MNIST.__init__(self, root, train, transform, target_transform, download)
65 self.train = train # training set or test set
67 if download:
---> 68 self.download()
70 if not self._check_exists():
71 raise RuntimeError('Dataset not found.' +
72 ' You can use download=True to download it')
File ~/miniforge3/lib/python3.9/site-packages/torchvision/datasets/mnist.py:283, in EMNIST.download(self)
281 filename = self.url.rpartition('/')[2]
282 file_path = os.path.join(self.raw_folder, filename)
--> 283 download_url(self.url, root=self.raw_folder, filename=filename, md5=None)
285 print('Extracting zip archive')
286 with zipfile.ZipFile(file_path) as zip_f:
File ~/miniforge3/lib/python3.9/site-packages/torchvision/datasets/utils.py:82, in download_url(url, root, filename, md5)
79 url = url.replace('https:', 'http:')
80 print('Failed download. Trying https -> http instead.'
81 ' Downloading ' + url + ' to ' + fpath)
---> 82 urllib.request.urlretrieve(
83 url, fpath,
84 reporthook=gen_bar_updater()
85 )
File ~/miniforge3/lib/python3.9/urllib/request.py:239, in urlretrieve(url, filename, reporthook, data)
222 """
223 Retrieve a URL into a temporary location on disk.
224
(...)
235 data file as well as the resulting HTTPMessage object.
236 """
237 url_type, path = _splittype(url)
--> 239 with contextlib.closing(urlopen(url, data)) as fp:
240 headers = fp.info()
242 # Just return the local path and the "headers" for file://
243 # URLs. No sense in performing a copy unless requested.
File ~/miniforge3/lib/python3.9/urllib/request.py:214, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
212 else:
213 opener = _opener
--> 214 return opener.open(url, data, timeout)
File ~/miniforge3/lib/python3.9/urllib/request.py:523, in OpenerDirector.open(self, fullurl, data, timeout)
521 for processor in self.process_response.get(protocol, []):
522 meth = getattr(processor, meth_name)
--> 523 response = meth(req, response)
525 return response
File ~/miniforge3/lib/python3.9/urllib/request.py:632, in HTTPErrorProcessor.http_response(self, request, response)
629 # According to RFC 2616, "2xx" code indicates that the client's
630 # request was successfully received, understood, and accepted.
631 if not (200 <= code < 300):
--> 632 response = self.parent.error(
633 'http', request, response, code, msg, hdrs)
635 return response
File ~/miniforge3/lib/python3.9/urllib/request.py:555, in OpenerDirector.error(self, proto, *args)
553 http_err = 0
554 args = (dict, proto, meth_name) + args
--> 555 result = self._call_chain(*args)
556 if result:
557 return result
File ~/miniforge3/lib/python3.9/urllib/request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
492 for handler in handlers:
493 func = getattr(handler, meth_name)
--> 494 result = func(*args)
495 if result is not None:
496 return result
File ~/miniforge3/lib/python3.9/urllib/request.py:747, in HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
744 fp.read()
745 fp.close()
--> 747 return self.parent.open(new, timeout=req.timeout)
File ~/miniforge3/lib/python3.9/urllib/request.py:523, in OpenerDirector.open(self, fullurl, data, timeout)
521 for processor in self.process_response.get(protocol, []):
522 meth = getattr(processor, meth_name)
--> 523 response = meth(req, response)
525 return response
File ~/miniforge3/lib/python3.9/urllib/request.py:632, in HTTPErrorProcessor.http_response(self, request, response)
629 # According to RFC 2616, "2xx" code indicates that the client's
630 # request was successfully received, understood, and accepted.
631 if not (200 <= code < 300):
--> 632 response = self.parent.error(
633 'http', request, response, code, msg, hdrs)
635 return response
File ~/miniforge3/lib/python3.9/urllib/request.py:561, in OpenerDirector.error(self, proto, *args)
559 if http_err:
560 args = (dict, 'default', 'http_error_default') + orig_args
--> 561 return self._call_chain(*args)
File ~/miniforge3/lib/python3.9/urllib/request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
492 for handler in handlers:
493 func = getattr(handler, meth_name)
--> 494 result = func(*args)
495 if result is not None:
496 return result
File ~/miniforge3/lib/python3.9/urllib/request.py:641, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
640 def http_error_default(self, req, fp, code, msg, hdrs):
--> 641 raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: HTTP Error 503: Service Unavailable
```
Perhaps we need to update the url?
Versions
cc @pmeier