Closed RichardScottOZ closed 3 years ago
STACError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\satstac\thing.py in open(cls, filename)
47 try:
---> 48 dat = cls.open_remote(filename)
49 except STACError as err:
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\satstac\thing.py in open_remote(self, url, headers)
38 else:
---> 39 raise STACError('Unable to open %s' % url)
40 return json.loads(dat)
STACError: Unable to open https://cmr.earthdata.nasa.gov/stac/LPDAAC_ECS/collections?limit=500
During handling of the above exception, another exception occurred:
SSLCertVerificationError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
698 # Make the request on the httplib connection object.
--> 699 httplib_response = self._make_request(
700 conn,
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
381 try:
--> 382 self._validate_conn(conn)
383 except (SocketTimeout, BaseSSLError) as e:
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
1009 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
-> 1010 conn.connect()
1011
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\connection.py in connect(self)
463 )
--> 464 _match_hostname(cert, self.assert_hostname or server_hostname)
465
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\connection.py in _match_hostname(cert, asserted_hostname)
511 try:
--> 512 match_hostname(cert, asserted_hostname)
513 except CertificateError as e:
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\ssl.py in match_hostname(cert, hostname)
415 if len(dnsnames) > 1:
--> 416 raise CertificateError("hostname %r "
417 "doesn't match either of %s"
SSLCertVerificationError: ("hostname 'cmr.earthdata.nasa.gov.s3.amazonaws.com' doesn't match either of '*.s3.amazonaws.com', 's3.amazonaws.com'",)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
754
--> 755 retries = retries.increment(
756 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
573 if new_retry.is_exhausted():
--> 574 raise MaxRetryError(_pool, url, error or ResponseError(cause))
575
MaxRetryError: HTTPSConnectionPool(host='cmr.earthdata.nasa.gov.s3.amazonaws.com', port=443): Max retries exceeded with url: /stac/LPDAAC_ECS/collections?limit=500 (Caused by SSLError(SSLCertVerificationError("hostname 'cmr.earthdata.nasa.gov.s3.amazonaws.com' doesn't match either of '*.s3.amazonaws.com', 's3.amazonaws.com'")))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
<ipython-input-11-6dee5ca629e5> in <module>
----> 1 cat2 = intake.open_stac_catalog(f'https://cmr.earthdata.nasa.gov/stac/LPDAAC_ECS/collections?limit={limit}')
2 col_info2 = pd.DataFrame(cat2.metadata['collections'])
3 col_info2.head(1)
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\intake_stac\catalog.py in __init__(self, stac_obj, **kwargs)
59 self._stac_obj = stac_obj
60 elif isinstance(stac_obj, str):
---> 61 self._stac_obj = self._stac_cls.open(stac_obj)
62 else:
63 raise ValueError('Expected %s instance, got: %s' % (self._stac_cls, type(stac_obj)))
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\satstac\thing.py in open(cls, filename)
50 # try signed URL
51 url, headers = get_s3_signed_url(filename)
---> 52 dat = cls.open_remote(url, headers)
53 else:
54 if os.path.exists(filename):
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\satstac\thing.py in open_remote(self, url, headers)
33 def open_remote(self, url, headers={}):
34 """ Open remote file """
---> 35 resp = requests.get(url, headers=headers)
36 if resp.status_code == 200:
37 dat = resp.text
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\requests\api.py in get(url, params, **kwargs)
74
75 kwargs.setdefault('allow_redirects', True)
---> 76 return request('get', url, params=params, **kwargs)
77
78
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\requests\api.py in request(method, url, **kwargs)
59 # cases, and look like a memory leak in others.
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
62
63
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
540 }
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
544 return resp
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
653
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
657 # Total elapsed time of the request (approximately)
~\AppData\Local\Continuum\anaconda3\envs\stackstac\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
512 if isinstance(e.reason, _SSLError):
513 # This branch is for urllib3 v1.22 and later.
--> 514 raise SSLError(e, request=request)
515
516 raise ConnectionError(e, request=request)
SSLError: HTTPSConnectionPool(host='cmr.earthdata.nasa.gov.s3.amazonaws.com', port=443): Max retries exceeded with url: /stac/LPDAAC_ECS/collections?limit=500 (Caused by SSLError(SSLCertVerificationError("hostname 'cmr.earthdata.nasa.gov.s3.amazonaws.com' doesn't match either of '*.s3.amazonaws.com', 's3.amazonaws.com'")))
@RichardScottOZ Thanks for the report - I thought at first this was due to too many queries being made to CMR (CMR-STAC is a proxy to CMR, and in some cases it makes a lot of queries to CMR for a single STAC request). A limit of 300 works, where anything much more than that will error out. But the returned error doesn't support that, and I'm not entirely sure what it means.
In the coming weeks we're looking at refactoring the CMR queries to improve performance and the number of calls to CMR. We'll investigate this further when doing that.
Ok, thanks. Should test it again with 200?
I just tried 200 - that worked.
The above worked with 250, anyway.
Thanks to Scott Henderson for the approach
some experiments have found that some collections are fine, some return errors. I was interested in looking at ASTER coverage, so:-
https://github.com/RichardScottOZ/Pangeo-Experiments/blob/main/STAC-Catalogue-NASA-LPDAAC-ECS-AST.ipynb