neurohackademy / nh2020-jupyterhub

hub.neurohackademy.org: Deployment config, docker image, documentation.
16 stars 27 forks source link

Getting data with s3fs from public buckets fails #113

Open arokem opened 4 years ago

arokem commented 4 years ago

Even though credentials are, in principle, not required to read data from public buckets:

fs.ls('/openneuro.org/')

Throws:

---------------------------------------------------------------------------
NoCredentialsError                        Traceback (most recent call last)
<ipython-input-3-f77059afe139> in <module>
----> 1 ll = fs.ls('/openneuro.org/')

/srv/conda/envs/notebook/lib/python3.7/site-packages/s3fs/core.py in ls(self, path, detail, refresh, **kwargs)
    617         """
    618         path = self._strip_protocol(path).rstrip('/')
--> 619         files = self._ls(path, refresh=refresh)
    620         if not files:
    621             files = self._ls(self._parent(path), refresh=refresh)

/srv/conda/envs/notebook/lib/python3.7/site-packages/s3fs/core.py in _ls(self, path, refresh)
    485             return self._lsbuckets(refresh)
    486         else:
--> 487             return self._lsdir(path, refresh)
    488 
    489     def exists(self, path):

/srv/conda/envs/notebook/lib/python3.7/site-packages/s3fs/core.py in _lsdir(self, path, refresh, max_items)
    392                 files = []
    393                 dircache = []
--> 394                 for i in it:
    395                     dircache.extend(i.get('CommonPrefixes', []))
    396                     for c in i.get('Contents', []):

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/paginate.py in __iter__(self)
    253         self._inject_starting_params(current_kwargs)
    254         while True:
--> 255             response = self._make_request(current_kwargs)
    256             parsed = self._extract_parsed_response(response)
    257             if first_request:

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/paginate.py in _make_request(self, current_kwargs)
    330 
    331     def _make_request(self, current_kwargs):
--> 332         return self._method(**current_kwargs)
    333 
    334     def _extract_parsed_response(self, response):

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
    314                     "%s() only accepts keyword arguments." % py_operation_name)
    315             # The "self" in this scope is referring to the BaseClient.
--> 316             return self._make_api_call(operation_name, kwargs)
    317 
    318         _api_call.__name__ = str(py_operation_name)

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
    620         else:
    621             http, parsed_response = self._make_request(
--> 622                 operation_model, request_dict, request_context)
    623 
    624         self.meta.events.emit(

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/client.py in _make_request(self, operation_model, request_dict, request_context)
    639     def _make_request(self, operation_model, request_dict, request_context):
    640         try:
--> 641             return self._endpoint.make_request(operation_model, request_dict)
    642         except Exception as e:
    643             self.meta.events.emit(

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/endpoint.py in make_request(self, operation_model, request_dict)
    100         logger.debug("Making request for %s with params: %s",
    101                      operation_model, request_dict)
--> 102         return self._send_request(request_dict, operation_model)
    103 
    104     def create_request(self, params, operation_model=None):

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/endpoint.py in _send_request(self, request_dict, operation_model)
    130     def _send_request(self, request_dict, operation_model):
    131         attempts = 1
--> 132         request = self.create_request(request_dict, operation_model)
    133         context = request_dict['context']
    134         success_response, exception = self._get_response(

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/endpoint.py in create_request(self, params, operation_model)
    114                 op_name=operation_model.name)
    115             self._event_emitter.emit(event_name, request=request,
--> 116                                      operation_name=operation_model.name)
    117         prepared_request = self.prepare_request(request)
    118         return prepared_request

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
    354     def emit(self, event_name, **kwargs):
    355         aliased_event_name = self._alias_event_name(event_name)
--> 356         return self._emitter.emit(aliased_event_name, **kwargs)
    357 
    358     def emit_until_response(self, event_name, **kwargs):

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
    226                  handlers.
    227         """
--> 228         return self._emit(event_name, kwargs)
    229 
    230     def emit_until_response(self, event_name, **kwargs):

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/hooks.py in _emit(self, event_name, kwargs, stop_on_response)
    209         for handler in handlers_to_call:
    210             logger.debug('Event %s: calling handler %s', event_name, handler)
--> 211             response = handler(**kwargs)
    212             responses.append((handler, response))
    213             if stop_on_response and response is not None:

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/signers.py in handler(self, operation_name, request, **kwargs)
     88         # this method is invoked to sign the request.
     89         # Don't call this method directly.
---> 90         return self.sign(operation_name, request)
     91 
     92     def sign(self, operation_name, request, region_name=None,

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/signers.py in sign(self, operation_name, request, region_name, signing_type, expires_in, signing_name)
    158                     raise e
    159 
--> 160             auth.add_auth(request)
    161 
    162     def _choose_signer(self, operation_name, signing_type, context):

/srv/conda/envs/notebook/lib/python3.7/site-packages/botocore/auth.py in add_auth(self, request)
    355     def add_auth(self, request):
    356         if self.credentials is None:
--> 357             raise NoCredentialsError
    358         datetime_now = datetime.datetime.utcnow()
    359         request.context['timestamp'] = datetime_now.strftime(SIGV4_TIMESTAMP)

NoCredentialsError: Unable to locate credentials

It might be enough to have some dummy credentials installed in ~/.aws/credentials?

arokem commented 4 years ago

Yep - running mkdir ~/.aws/credentials && touch ~/.aws/credentials resolves this issue. I'll keep this open for now, so we can consider baking this into the image.

consideRatio commented 4 years ago

I figured I'd report this upstream, but I failed to workaround this by adding a blank credentials file. I found out that you can initialize the S3FileSystem() with a boolean anon=True to make that issue go away though.

Related

Reproduced error

import s3fs
fs = s3fs.S3FileSystem()
fs.ls('/openneuro.org/')

Workaround that didn't work

mkdir -p ~/.aws && touch ~/.aws/credentials

Solution

import s3fs
fs = s3fs.S3FileSystem(anon=True)
fs.ls('/openneuro.org/')