Closed vetlewi closed 1 year ago
The following block of code will recreate the fault independent of OMpy
from pathos.multiprocessing import ProcessPool
from typing import Optional, Tuple, Iterator, Any, Union
import inspect
import re
import tqdm
def self_if_none(instance: Any, variable: Any, nonable: bool = False) -> Any:
""" Sets `variable` from instance if variable is None.
Note: Has to be imported as in the normalizer class due to class stack
name retrieval
Args:
instance: instance
variable: The variable to check
nonable: Does not raise ValueError if
variable and self.<variable_name> is None
(where <variable_name> is replace by the variable's name).
Returns:
The value of variable or instance.variable
Raises:
ValueError: Nonable is True and if both variable and
self.variable are None
"""
def _retrieve_name(var: Any) -> str:
""" Finds the source-code name of `var`
NOTE: Only call from self.reset.
Args:
var: The variable to retrieve the name of.
Returns:
The variable's name.
"""
# Retrieve the line of the source code of the third frame.
# The 0th frame is the current function, the 1st frame is the
# calling function and the second is the calling function's caller.
line = inspect.stack()[3].code_context[0].strip()
match = re.search(r".*\((\w+).*\).*", line)
assert match is not None, "Retrieving of name failed"
name = match.group(1)
return name
name = _retrieve_name(variable)
if variable is None:
self_variable = getattr(instance, name)
if not nonable and self_variable is None:
raise ValueError(f"`{name}` must be set")
return self_variable
return variable
class test_class:
def __init__(self, im_a_object = None):
self.my_object = im_a_object
def step(self, i, my_object=None):
# I will copy the n object over self.my_object it it isnt None.
self.my_object = self.self_if_none(my_object)
return i + n
def self_if_none(self, *args, **kwargs):
""" wrapper for lib.self_if_none """
return self_if_none(self, *args, **kwargs)
ns = [1, 1, 1, 1, 1]
cls_suff = test_class(3)
res = []
for i, n in enumerate(ns):
res.append(cls_suff.step(i, ns))
print(res)
pool = ProcessPool(nodes=2)
iterator = pool.imap(cls_suff.step, range(len(ns)), ns)
res = list(tqdm.tqdm_notebook(iterator, total=len(ns)))
pool.close()
pool.join()
pool.clear()
gives output:
[1, 2, 3, 4, 5]
/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_25714/2921792000.py:78: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
res = list(tqdm.tqdm_notebook(iterator, total=len(ns)))
0%
0/5 [00:00<?, ?it/s]
---------------------------------------------------------------------------
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/site-packages/pathos/helpers/mp_helper.py", line -1, in <lambda>
File "/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_25714/2921792000.py", line 62, in step
return i + n
File "/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_25714/2921792000.py", line 146, in self_if_none
File "/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_25714/2921792000.py", line 45, in self_if_none
name = _retrieve_name(variable)
File "/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_25714/2921792000.py", line 39, in _retrieve_name
line = inspect.stack()[3].code_context[0].strip()
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/inspect.py", line 1678, in stack
return getouterframes(sys._getframe(1), context)
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/inspect.py", line 1655, in getouterframes
frameinfo = (frame,) + getframeinfo(frame, context)
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/inspect.py", line 1627, in getframeinfo
start = lineno - 1 - context//2
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
"""
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_25714/2921792000.py in <module>
76 pool = ProcessPool(nodes=2)
77 iterator = pool.imap(cls_suff.step, range(len(ns)), ns)
---> 78 res = list(tqdm.tqdm_notebook(iterator, total=len(ns)))
79 pool.close()
80 pool.join()
~/.pyenv/versions/3.10.1/lib/python3.10/site-packages/tqdm/notebook.py in __iter__(self)
255 def __iter__(self):
256 try:
--> 257 for obj in super(tqdm_notebook, self).__iter__():
258 # return super(tqdm...) will not catch exception
259 yield obj
~/.pyenv/versions/3.10.1/lib/python3.10/site-packages/tqdm/std.py in __iter__(self)
1178
1179 try:
-> 1180 for obj in iterable:
1181 yield obj
1182 # Update and possibly print the progressbar.
~/.pyenv/versions/3.10.1/lib/python3.10/site-packages/multiprocess/pool.py in next(self, timeout)
868 if success:
869 return value
--> 870 raise value
871
872 __next__ = next # XXX
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
This may be a bug in pathos. Will try to recreate with the default python processing pool class.
I have encountered a similar bug before. OMpy does not play well with multiprocessing, but I have not found the root cause, but Pathos seems to crash too often to be worth the effort.
What is worrying is that I am unable to replicate the bug using the code you provided. The function self_if_none
is very brittle as it walks up Python's stack frame to find the variable and its value, hence it will fail if the stack does not look as when I wrote it.
For future use we should remove self_if_none
and find a more robust solution.
This is the simplest I've been able to get it:
from pathos.multiprocessing import ProcessPool
import inspect
def do_something_with_inspect(count):
count *= 2
return inspect.stack()
pool = ProcessPool(nodes=2)
iterator = pool.imap(do_something_with_inspect, range(4))
for r in iterator:
print(r)
pool.close()
pool.join()
pool.clear()
results:
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/vetlewi/.pyenv/versions/3.10.1/envs/pathosbug/lib/python3.10/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/Users/vetlewi/Desktop/python_playground/pathos/pathos/helpers/mp_helper.py", line -1, in <lambda>
File "/Users/vetlewi/Desktop/python_playground/pathos_error/simple.py", line -1, in do_something_with_inspect
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/inspect.py", line 1678, in stack
return getouterframes(sys._getframe(1), context)
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/inspect.py", line 1655, in getouterframes
frameinfo = (frame,) + getframeinfo(frame, context)
File "/Users/vetlewi/.pyenv/versions/3.10.1/lib/python3.10/inspect.py", line 1627, in getframeinfo
start = lineno - 1 - context//2
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/vetlewi/Desktop/python_playground/pathos_error/simple.py", line 11, in <module>
for r in iterator:
File "/Users/vetlewi/.pyenv/versions/3.10.1/envs/pathosbug/lib/python3.10/site-packages/multiprocess/pool.py", line 870, in next
raise value
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
This is with python 3.10.1 and pathos HEAD
Interesting! Does the same bug occur with vanilla multiprocessing without Pathos? At least we now know that inspection does not play nice with Pathos.
Interesting! Does the same bug occur with vanilla multiprocessing without Pathos? At least we now know that inspection does not play nice with Pathos.
Vanilla works great:
from pathos.multiprocessing import ProcessPool
import inspect
def do_something_with_inspect(count):
count *= 2
r = inspect.stack()
return count
pool = ProcessPool(nodes=2)
iterator = pool.imap(do_something_with_inspect, range(4))
for r in iterator:
print(r)
pool.close()
pool.join()
pool.clear()
% python simple_python.py
0
2
4
6
Great! For a temporary fix we can switch to Vanilla.
Great! For a temporary fix we can switch to Vanilla.
I'm not sure if that is a simple fix unfortunately. Using vanilla I get the error:
2022-01-05 16:03:07,606 - ompy.ensemble_normalizer - INFO - Start normalization with 2 cpus
0%
0/2 [00:00<?, ?it/s]
---------------------------------------------------------------------------
PicklingError Traceback (most recent call last)
/var/folders/cb/ly7f8x1n48q9lkmqt6c6lyv00000gn/T/ipykernel_90525/1567711863.py in <module>
----> 1 ensembleNorm.normalize()
~/Git_Repositories/vetlewi/ompy/ompy/ensemble_normalizer.py in normalize(self)
120 N = len(nlds)
121 iterator = pool.imap(self.step_wrapper, zip(range(N), zip(nlds, gsfs)))
--> 122 self.res = list(tqdm(iterator, total=N))
123 pool.close()
124 pool.join()
~/.pyenv/versions/3.10.1/lib/python3.10/site-packages/tqdm/notebook.py in __iter__(self)
255 def __iter__(self):
256 try:
--> 257 for obj in super(tqdm_notebook, self).__iter__():
258 # return super(tqdm...) will not catch exception
259 yield obj
~/.pyenv/versions/3.10.1/lib/python3.10/site-packages/tqdm/std.py in __iter__(self)
1178
1179 try:
-> 1180 for obj in iterable:
1181 yield obj
1182 # Update and possibly print the progressbar.
~/.pyenv/versions/3.10.1/lib/python3.10/multiprocessing/pool.py in next(self, timeout)
868 if success:
869 return value
--> 870 raise value
871
872 __next__ = next # XXX
~/.pyenv/versions/3.10.1/lib/python3.10/multiprocessing/pool.py in _handle_tasks(taskqueue, put, outqueue, pool, cache)
535 break
536 try:
--> 537 put(task)
538 except Exception as e:
539 job, idx = task[:2]
~/.pyenv/versions/3.10.1/lib/python3.10/multiprocessing/connection.py in send(self, obj)
209 self._check_closed()
210 self._check_writable()
--> 211 self._send_bytes(_ForkingPickler.dumps(obj))
212
213 def recv_bytes(self, maxlength=None):
~/.pyenv/versions/3.10.1/lib/python3.10/multiprocessing/reduction.py in dumps(cls, obj, protocol)
49 def dumps(cls, obj, protocol=None):
50 buf = io.BytesIO()
---> 51 cls(buf, protocol).dump(obj)
52 return buf.getbuffer()
53
PicklingError: Can't pickle <function const_temperature at 0x12c89f370>: attribute lookup const_temperature on ompy.normalizer_nld failed
This seems to be just another rabbit hole:(
For now I will wrap everything in an exception block and fall back to single threaded if multiprocessing fails.
There seems to be a bug in the
self_if_none()
function when ever it is used in subprocesses in python 3.10.1 The issue doesn't occur in 3.9.6This is the error I'm getting: