I checked if the files are correctly present, and tried to provide the local minio s3 URL to StreamingDataset, but encountered an error:
line 87, in <module>
train_dataset = StreamingDataset(
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/dataset.py", line 473, in __init__
stream_shards = stream.get_shards(self._unique_rank_world, self.allow_unsafe_types)
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/stream.py", line 446, in get_shards
tmp_filename = self._download_file(basename, basename + '.tmp')
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/stream.py", line 310, in _download_file
retry(num_attempts=self.download_retry)(
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/util.py", line 525, in new_func
raise e
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/util.py", line 521, in new_func
return func(*args, **kwargs)
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/stream.py", line 311, in <lambda>
lambda: download_file(remote, local, self.download_timeout))()
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/storage/download.py", line 507, in download_file
download_from_s3(remote, local, timeout)
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/storage/download.py", line 111, in download_from_s3
raise e
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/storage/download.py", line 101, in download_from_s3
_download_file(extra_args=extra_args)
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/streaming/base/storage/download.py", line 73, in _download_file
s3.download_file(obj.netloc,
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/boto3/s3/inject.py", line 190, in download_file
return transfer.download_file(
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/boto3/s3/transfer.py", line 326, in download_file
future.result()
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/s3transfer/futures.py", line 103, in result
return self._coordinator.result()
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/s3transfer/futures.py", line 266, in result
raise self._exception
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/s3transfer/tasks.py", line 269, in _main
self._submit(transfer_future=transfer_future, **kwargs)
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/s3transfer/download.py", line 354, in _submit
response = client.head_object(
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/botocore/client.py", line 530, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/Users/abhijithneilabraham/mambaforge/envs/py/lib/python3.9/site-packages/botocore/client.py", line 964, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: Object s3://dudetest/train_dataset/index.json not found! Either check the bucket path or the bucket permission. If the bucket is a requester pays bucket, then provide the bucket name to the environment variable `MOSAICML_STREAMING_AWS_REQUESTER_PAYS`.
Is this because using minio s3 object with StreamingDataset not supported?
I have been creating an s3 object with the help of minio in localhost like this:
I checked if the files are correctly present, and tried to provide the local minio s3 URL to StreamingDataset, but encountered an error:
Is this because using minio s3 object with StreamingDataset not supported?