Open aanilpala opened 3 years ago
Hi,
Do you have a longer stack trace, did you you try changing the limit on the file descriptors?:
ulimit -n 20000
(for jupyter this should be done before the notebook server is started).
cheers,
Maarten
Hi, here is the full stack trace. I also tried with ulimit -n 200000
. Same error
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/ahmet-anil.pala/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py", line 239, in _process_worker
r = call_item.fn(*call_item.args, **call_item.kwargs)
File "/Users/ahmet-anil.pala/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py", line 198, in _process_chunk
return [fn(*args) for args in chunk]
File "/Users/ahmet-anil.pala/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py", line 198, in <listcomp>
return [fn(*args) for args in chunk]
File "<ipython-input-3-4321503d0e24>", line 3, in worker
vaex_df.export_hdf5(f"./removeme{ind}.hdf5")
File "/Users/ahmet-anil.pala/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/dataframe.py", line 6209, in export_hdf5
writer.write(self, chunk_size=chunk_size, progress=progress, column_count=column_count)
File "/Users/ahmet-anil.pala/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/hdf5/writer.py", line 54, in write
df.execute()
File "/Users/ahmet-anil.pala/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/dataframe.py", line 308, in execute
just_run(self.execute_async())
File "/Users/ahmet-anil.pala/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/asyncio.py", line 35, in just_run
return loop.run_until_complete(coro)
File "/Users/ahmet-anil.pala/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/nest_asyncio.py", line 64, in run_until_complete
self._run_once()
File "/Users/ahmet-anil.pala/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/nest_asyncio.py", line 87, in _run_once
event_list = self._selector.select(timeout)
File "/Users/ahmet-anil.pala/.pyenv/versions/3.7.9/lib/python3.7/selectors.py", line 558, in select
kev_list = self._selector.control(None, max_ev, timeout)
OSError: [Errno 9] Bad file descriptor
"""
The above exception was the direct cause of the following exception:
OSError Traceback (most recent call last)
<ipython-input-5-14cfd923b1e8> in <module>
----> 1 res_list = list(pool.map(worker, range(10)))
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py in _chain_from_iterable_of_lists(iterable)
481 careful not to keep references to yielded objects.
482 """
--> 483 for element in iterable:
484 element.reverse()
485 while element:
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/_base.py in result_iterator()
596 # Careful not to keep a reference to the popped future
597 if timeout is None:
--> 598 yield fs.pop().result()
599 else:
600 yield fs.pop().result(end_time - time.monotonic())
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py in _process_worker()
237 return
238 try:
--> 239 r = call_item.fn(*call_item.args, **call_item.kwargs)
240 except BaseException as e:
241 exc = _ExceptionWithTraceback(e, e.__traceback__)
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py in _process_chunk()
196
197 """
--> 198 return [fn(*args) for args in chunk]
199
200
~/.pyenv/versions/3.7.9/lib/python3.7/concurrent/futures/process.py in <listcomp>()
196
197 """
--> 198 return [fn(*args) for args in chunk]
199
200
<ipython-input-3-4321503d0e24> in worker()
1 def worker(ind):
2 vaex_df = vaex.example()
----> 3 vaex_df.export_hdf5(f"./removeme{ind}.hdf5")
4 return True
~/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/dataframe.py in export_hdf5()
6207 from vaex.hdf5.writer import Writer
6208 with Writer(path) as writer:
-> 6209 writer.write(self, chunk_size=chunk_size, progress=progress, column_count=column_count)
6210
6211 @docsubst
~/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/hdf5/writer.py in write()
52 str_byte_length = {name:df[name].str.byte_length().sum(delay=True) for name, dtype in dtypes.items() if dtype.is_string}
53 str_count = {name:df.count(df[name], delay=True) for name, dtype in dtypes.items() if dtype.is_string}
---> 54 df.execute()
55
56 str_byte_length = {k: v.get() for k, v in str_byte_length.items()}
~/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/dataframe.py in execute()
306 '''Execute all delayed jobs.'''
307 from .asyncio import just_run
--> 308 just_run(self.execute_async())
309
310 async def execute_async(self):
~/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/vaex/asyncio.py in just_run()
33 nest_asyncio.apply()
34 check_patch_tornado()
---> 35 return loop.run_until_complete(coro)
36 finally:
37 if not had_loop: # remove loop if we did not have one
~/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/nest_asyncio.py in run_until_complete()
62 f._log_destroy_pending = False
63 while not f.done():
---> 64 self._run_once()
65 if self._stopping:
66 break
~/klarna_repos/fraud-model-utils/.venv/lib/python3.7/site-packages/nest_asyncio.py in _run_once()
85 else min(max(scheduled[0]._when - now, 0), 86400) if scheduled
86 else None)
---> 87 event_list = self._selector.select(timeout)
88 self._process_events(event_list)
89
~/.pyenv/versions/3.7.9/lib/python3.7/selectors.py in select()
556 ready = []
557 try:
--> 558 kev_list = self._selector.control(None, max_ev, timeout)
559 except InterruptedError:
560 return ready
OSError: [Errno 9] Bad file descriptor
Description The problem mentioned in the title seems to be platform dependent. On my local setup, I am able to reproduce it with the following code
This gives me the following error:
OSError: [Errno 9] Bad file descriptor
Software information
Additional information Same thing works in a python shell or executing the code with python from the command line