I am trying to read hdf5 files from S3 using vaex on windows. I am using simple vaex.open(s3path, fs_options). reading from s3 works fine from other packages eg: pandas.
I could not find anything online to fix the issue. I am curious if anything I am missing.
Below are my machine configurations:
Windows 10.
Python version: 3.10.7
vaex: 4.16.0
pyarrow: 11.0.0
Detailed error trace:
OSError Traceback (most recent call last)
Cell In [25], line 4
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex__init__.py:244, in open(path, convert, progress, shuffle, fs_options, fs, *args, kwargs)
242 ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs)
243 else:
--> 244 ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, kwargs)
245 df = vaex.from_dataset(ds)
246 if df is None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\dataset.py:80, in open(path, fs_options, fs, *args, kwargs)
78 for opener in opener_classes:
79 if opener.quick_test(path, fs_options=fs_options, fs=fs):
---> 80 if opener.can_open(path, fs_options=fs_options, fs=fs, *args, *kwargs):
81 return opener.open(path, fs_options=fs_options, fs=fs, args, kwargs)
83 # otherwise try all openers
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\hdf5\dataset.py:167, in Hdf5MemoryMapped.can_open(cls, path, fs_options, fs, group, kwargs)
165 @classmethod
166 def can_open(cls, path, fs_options={}, fs=None, group=None, kwargs):
--> 167 with vaex.file.open(path, fs_options=fs_options, fs=fs) as f:
168 signature = f.read(4)
169 if signature != b"\x89\x48\x44\x46":
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file__init__.py:339, in open(path, mode, fs_options, fs, for_arrow, mmap, encoding)
337 else:
338 raise ValueError(f'Only mode=rb/bw/r/w are supported, not {mode}')
--> 339 return FileProxy(create(), path, create)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:71, in FileSystemHandlerCached.open_input_file(self, path)
69 # TODO: we may wait to cache the mmapped file
70 if full_path not in self._file_cache:
---> 71 f = CachedFile(real_open, full_path, read_as_buffer=not self.for_arrow)
72 self._file_cache[full_path] = f
73 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow_fs.pyx:770, in pyarrow._fs.FileSystem.open_input_file()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow\error.pxi:144, in pyarrow.lib.pyarrow_internal_check_status()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow\error.pxi:115, in pyarrow.lib.check_status()
OSError: When reading information for key 'folderpath/key.hdf5' in bucket 'bucketname': AWS Error NETWORK_CONNECTION during HeadObject operation: Encountered network error when sending http request
I am trying to read hdf5 files from S3 using vaex on windows. I am using simple vaex.open(s3path, fs_options). reading from s3 works fine from other packages eg: pandas.
I could not find anything online to fix the issue. I am curious if anything I am missing.
Below are my machine configurations: Windows 10. Python version: 3.10.7 vaex: 4.16.0 pyarrow: 11.0.0
Detailed error trace: OSError Traceback (most recent call last) Cell In [25], line 4 File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex__init__.py:244, in open(path, convert, progress, shuffle, fs_options, fs, *args, kwargs) 242 ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs) 243 else: --> 244 ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, kwargs) 245 df = vaex.from_dataset(ds) 246 if df is None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\dataset.py:80, in open(path, fs_options, fs, *args, kwargs) 78 for opener in opener_classes: 79 if opener.quick_test(path, fs_options=fs_options, fs=fs): ---> 80 if opener.can_open(path, fs_options=fs_options, fs=fs, *args, *kwargs): 81 return opener.open(path, fs_options=fs_options, fs=fs, args, kwargs) 83 # otherwise try all openers
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\hdf5\dataset.py:167, in Hdf5MemoryMapped.can_open(cls, path, fs_options, fs, group, kwargs) 165 @classmethod 166 def can_open(cls, path, fs_options={}, fs=None, group=None, kwargs): --> 167 with vaex.file.open(path, fs_options=fs_options, fs=fs) as f: 168 signature = f.read(4) 169 if signature != b"\x89\x48\x44\x46":
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file__init__.py:339, in open(path, mode, fs_options, fs, for_arrow, mmap, encoding) 337 else: 338 raise ValueError(f'Only mode=rb/bw/r/w are supported, not {mode}') --> 339 return FileProxy(create(), path, create)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file__init__.py:323, in open..create()
322 def create():
--> 323 return fs.open_input_file(path)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:71, in FileSystemHandlerCached.open_input_file(self, path) 69 # TODO: we may wait to cache the mmapped file 70 if full_path not in self._file_cache: ---> 71 f = CachedFile(real_open, full_path, read_as_buffer=not self.for_arrow) 72 self._file_cache[full_path] = f 73 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:187, in CachedFile.init(self, file, path, cache_dir, block_size, data_file, mask_file, read_as_buffer) 185 self.length = f.tell() 186 else: --> 187 self._use_file() 188 self.file.seek(0, 2) 189 self.length = self.file.tell()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:238, in CachedFile._use_file(self) 236 def _use_file(self): 237 if callable(self.file): --> 238 self.file = self.file()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\vaex\file\cache.py:67, in FileSystemHandlerCached.open_input_file..real_open()
66 def real_open():
---> 67 return self.fs.open_input_file(path)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow_fs.pyx:770, in pyarrow._fs.FileSystem.open_input_file()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow\error.pxi:144, in pyarrow.lib.pyarrow_internal_check_status()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pyarrow\error.pxi:115, in pyarrow.lib.check_status()
OSError: When reading information for key 'folderpath/key.hdf5' in bucket 'bucketname': AWS Error NETWORK_CONNECTION during HeadObject operation: Encountered network error when sending http request