vaexio / vaex

Out-of-Core hybrid Apache Arrow/NumPy DataFrame for Python, ML, visualization and exploration of big tabular data at a billion rows per second 🚀
https://vaex.io
MIT License
8.23k stars 590 forks source link

AWS Error NETWORK_CONNECTION during HeadObject operation: Encountered network error when sending http request #2340

Open srivathshanks opened 1 year ago

srivathshanks commented 1 year ago

I am trying to read hdf5 files from S3 using vaex on windows. I am using simple vaex.open(s3path, fs_options). reading from s3 works fine from other packages eg: pandas.

I could not find anything online to fix the issue. I am curious if anything I am missing.

Below are my machine configurations: Windows 10. Python version: 3.10.7 vaex: 4.16.0 pyarrow: 11.0.0

Detailed error trace: OSError Traceback (most recent call last) Cell In [25], line 4 File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex__init__.py:244, in open(path, convert, progress, shuffle, fs_options, fs, *args, kwargs) 242 ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs) 243 else: --> 244 ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, kwargs) 245 df = vaex.from_dataset(ds) 246 if df is None:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\dataset.py:80, in open(path, fs_options, fs, *args, kwargs) 78 for opener in opener_classes: 79 if opener.quick_test(path, fs_options=fs_options, fs=fs): ---> 80 if opener.can_open(path, fs_options=fs_options, fs=fs, *args, *kwargs): 81 return opener.open(path, fs_options=fs_options, fs=fs, args, kwargs) 83 # otherwise try all openers

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\hdf5\dataset.py:167, in Hdf5MemoryMapped.can_open(cls, path, fs_options, fs, group, kwargs) 165 @classmethod 166 def can_open(cls, path, fs_options={}, fs=None, group=None, kwargs): --> 167 with vaex.file.open(path, fs_options=fs_options, fs=fs) as f: 168 signature = f.read(4) 169 if signature != b"\x89\x48\x44\x46":

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file__init__.py:339, in open(path, mode, fs_options, fs, for_arrow, mmap, encoding) 337 else: 338 raise ValueError(f'Only mode=rb/bw/r/w are supported, not {mode}') --> 339 return FileProxy(create(), path, create)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file__init__.py:323, in open..create() 322 def create(): --> 323 return fs.open_input_file(path)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:71, in FileSystemHandlerCached.open_input_file(self, path) 69 # TODO: we may wait to cache the mmapped file 70 if full_path not in self._file_cache: ---> 71 f = CachedFile(real_open, full_path, read_as_buffer=not self.for_arrow) 72 self._file_cache[full_path] = f 73 else:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:187, in CachedFile.init(self, file, path, cache_dir, block_size, data_file, mask_file, read_as_buffer) 185 self.length = f.tell() 186 else: --> 187 self._use_file() 188 self.file.seek(0, 2) 189 self.length = self.file.tell()

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:238, in CachedFile._use_file(self) 236 def _use_file(self): 237 if callable(self.file): --> 238 self.file = self.file()

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:67, in FileSystemHandlerCached.open_input_file..real_open() 66 def real_open(): ---> 67 return self.fs.open_input_file(path)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow_fs.pyx:770, in pyarrow._fs.FileSystem.open_input_file()

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow\error.pxi:144, in pyarrow.lib.pyarrow_internal_check_status()

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow\error.pxi:115, in pyarrow.lib.check_status()

OSError: When reading information for key 'folderpath/key.hdf5' in bucket 'bucketname': AWS Error NETWORK_CONNECTION during HeadObject operation: Encountered network error when sending http request