michaelaye / nbplanetary

A nbdev-based approach to create a core library for planetarypy
https://michaelaye.github.io/nbplanetary/
MIT License
7 stars 1 forks source link

INcomplete Read Error when downloading the EDRCUMINDEX.TAB #64

Closed toihr closed 3 months ago

toihr commented 3 months ago

I have been getting the Incomplete read error during downloading the EDRCUMINDEX.TAB file

{
    "name": "ChunkedEncodingError",
    "message": "('Connection broken: IncompleteRead(253099718 bytes read, 2018417980 more expected)', IncompleteRead(253099718 bytes read, 2018417980 more expected))",
    "stack": "---------------------------------------------------------------------------
IncompleteRead                            Traceback (most recent call last)
File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/urllib3/response.py:737, in HTTPResponse._error_catcher(self)
    736 try:
--> 737     yield
    739 except SocketTimeout as e:
    740     # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
    741     # there is yet no clean way to get at it from this context.

File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/urllib3/response.py:883, in HTTPResponse._raw_read(self, amt, read1)
    873     if (
    874         self.enforce_content_length
    875         and self.length_remaining is not None
   (...)
    881         # raised during streaming, so all calls with incorrect
    882         # Content-Length are caught.
--> 883         raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
    884 elif read1 and (
    885     (amt != 0 and not data) or self.length_remaining == len(data)
    886 ):
   (...)
    889     # `http.client.HTTPResponse`, so we close it here.
    890     # See https://github.com/python/cpython/issues/113199

IncompleteRead: IncompleteRead(253099718 bytes read, 2018417980 more expected)

The above exception was the direct cause of the following exception:

ProtocolError                             Traceback (most recent call last)
File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/requests/models.py:816, in Response.iter_content.<locals>.generate()
    815 try:
--> 816     yield from self.raw.stream(chunk_size, decode_content=True)
    817 except ProtocolError as e:

File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/urllib3/response.py:1043, in HTTPResponse.stream(self, amt, decode_content)
   1042 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
-> 1043     data = self.read(amt=amt, decode_content=decode_content)
   1045     if data:

File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/urllib3/response.py:963, in HTTPResponse.read(self, amt, decode_content, cache_content)
    959 while len(self._decoded_buffer) < amt and data:
    960     # TODO make sure to initially read enough data to get past the headers
    961     # For example, the GZ file header takes 10 bytes, we don't want to read
    962     # it one byte at a time
--> 963     data = self._raw_read(amt)
    964     decoded_data = self._decode(data, decode_content, flush_decoder)

File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/urllib3/response.py:891, in HTTPResponse._raw_read(self, amt, read1)
    884     elif read1 and (
    885         (amt != 0 and not data) or self.length_remaining == len(data)
    886     ):
   (...)
    889         # `http.client.HTTPResponse`, so we close it here.
    890         # See https://github.com/python/cpython/issues/113199
--> 891         self._fp.close()
    893 if data:

File ~/.miniforge3/envs/planetary/lib/python3.9/contextlib.py:137, in _GeneratorContextManager.__exit__(self, typ, value, traceback)
    136 try:
--> 137     self.gen.throw(typ, value, traceback)
    138 except StopIteration as exc:
    139     # Suppress StopIteration *unless* it's the same exception that
    140     # was passed to throw().  This prevents a StopIteration
    141     # raised inside the \"with\" statement from being suppressed.

File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/urllib3/response.py:761, in HTTPResponse._error_catcher(self)
    760         arg = f\"Connection broken: {e!r}\"
--> 761     raise ProtocolError(arg, e) from e
    763 except (HTTPException, OSError) as e:

ProtocolError: ('Connection broken: IncompleteRead(253099718 bytes read, 2018417980 more expected)', IncompleteRead(253099718 bytes read, 2018417980 more expected))

During handling of the above exception, another exception occurred:

ChunkedEncodingError                      Traceback (most recent call last)
Cell In[11], line 1
----> 1 rm.launch_catalog_production(3)

File ~/Dropbox/masters_tom_ihro/Programming/Packages/p4tools/p4tools/production/catalog.py:694, in ReleaseManager.launch_catalog_production(self, n_workers)
    692 # calculate all metadata required for P4 analysis
    693 LOGGER.info(\"Writing summary metadata file.\")
--> 694 self.calc_metadata()
    695 # merging metadata
    696 self.merge_all()

File ~/Dropbox/masters_tom_ihro/Programming/Packages/p4tools/p4tools/production/catalog.py:440, in ReleaseManager.calc_metadata(self)
    438 if not self.EDRINDEX_meta_path.exists():
    439     NAs = p4meta.get_north_azimuths_from_SPICE(self.obsids)
--> 440     edrindex = get_index(\"mro.hirise\", \"edr\")
    441     p4_edr = (
    442         edrindex[edrindex.OBSERVATION_ID.isin(self.obsids)]
    443         .query('CCD_NAME==\"RED4\"')
    444         .drop_duplicates(subset=\"OBSERVATION_ID\")
    445     )
    446     p4_edr = p4_edr.set_index(\"OBSERVATION_ID\").join(
    447         NAs.set_index(\"OBSERVATION_ID\")
    448     )

File ~/Dropbox/masters_tom_ihro/Programming/Packages/nbplanetary/planetarypy/pds/apps.py:37, in get_index(instr, index_name, refresh, force)
     35     index.download()
     36 elif refresh and index.update_available:
---> 37     index.download()
     38     print(\"An updated index is available. Downloading...\")
     39 if not index.local_parq_path.exists():

File ~/Dropbox/masters_tom_ihro/Programming/Packages/nbplanetary/planetarypy/pds/indexes.py:208, in download(self)
    206 utils.url_retrieve(label_url, self.local_label_path)
    207 logger.info(\"Downloading %s.\", self.table_url)
--> 208 utils.url_retrieve(self.table_url, self.local_table_path)
    209 print(f\"Downloaded {self.local_label_path} and {self.local_table_path}\")
    210 if (
    211     self.key == \"missions.mro.hirise.indexes.edr\"
    212 ):  # HiRISE EDR index is broken on the PDS. Team knows.

File ~/Dropbox/masters_tom_ihro/Programming/Packages/nbplanetary/planetarypy/utils.py:212, in url_retrieve(url, outfile, chunk_size, user, passwd)
    204     raise ConnectionError(f\"Could not download {url}\
Error code: {R.status_code}\")
    205 with tqdm.wrapattr(
    206     open(outfile, \"wb\"),
    207     \"write\",
   (...)
    210     desc=str(Path(outfile).name),
    211 ) as fd:
--> 212     for chunk in R.iter_content(chunk_size=chunk_size):
    213         fd.write(chunk)

File ~/.miniforge3/envs/planetary/lib/python3.9/site-packages/requests/models.py:818, in Response.iter_content.<locals>.generate()
    816     yield from self.raw.stream(chunk_size, decode_content=True)
    817 except ProtocolError as e:
--> 818     raise ChunkedEncodingError(e)
    819 except DecodeError as e:
    820     raise ContentDecodingError(e)

ChunkedEncodingError: ('Connection broken: IncompleteRead(253099718 bytes read, 2018417980 more expected)', IncompleteRead(253099718 bytes read, 2018417980 more expected))"
}
michaelaye commented 3 months ago

I can't see the precise command that you used?

toihr commented 3 months ago

I think i have fixed this, i think there was a problem on initial download that broke something. I deleted the files and then it worked again after forcing a redownload with get_index(\"mro.hirise\", \"edr\")