uqfoundation / pathos

parallel graph management and execution in heterogeneous computing
http://pathos.rtfd.io
Other
1.38k stars 89 forks source link

Problem when running p.map(lambda x: ..) with pa.pools.ParallelPool() from Jupyter notebook #193

Open matiasdahl opened 4 years ago

matiasdahl commented 4 years ago

I am trying to run pathos to implement a parallel map where the function is a lambda. Strangely the below works from the command line, but the same code throws an OSError: could not extract source code exception when run from a Jupyter notebooks.

Eg. the below works when run from a command line (python 3.8)

import pathos as pa
f = lambda x: x+1
p = pa.pools.ParallelPool()

print(pa.__version__)                    # 0.2.5
print(p.map(lambda x: x+1, [1,2,3]))     # outputs [2, 3, 4]

But when run from a Jupyter notebook I get the following error:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-5-6ac2d9accb86> in <module>
      4 
      5 print(pa.__version__)                    # 0.2.5
----> 6 print(p.map(lambda x: x+1, [1,2,3]))     # outputs [2, 3, 4]

/usr/local/lib/python3.8/dist-packages/pathos/parallel.py in map(self, f, *args, **kwds)
    232     def map(self, f, *args, **kwds):
    233         AbstractWorkerPool._AbstractWorkerPool__map(self, f, *args, **kwds)
--> 234         return list(self.imap(f, *args))
    235     map.__doc__ = AbstractWorkerPool.map.__doc__
    236     def imap(self, f, *args, **kwds):

/usr/local/lib/python3.8/dist-packages/pathos/parallel.py in <genexpr>(.0)
    245                 self._is_alive(None)
    246         # submit all jobs, then collect results as they become available
--> 247         return (subproc() for subproc in builtins.map(submit, *args))
    248     imap.__doc__ = AbstractWorkerPool.imap.__doc__
    249     def uimap(self, f, *args, **kwds):

/usr/local/lib/python3.8/dist-packages/pathos/parallel.py in submit(*argz)
    241            #print("using %s local workers" % _pool.get_ncpus())
    242             try:
--> 243                 return _pool.submit(f, argz, globals=globals())
    244             except pp.DestroyedServerError:
    245                 self._is_alive(None)

/usr/local/lib/python3.8/dist-packages/pp/_pp.py in submit(self, func, args, depfuncs, modules, callback, callbackargs, group, globals)
    497                 depfuncs += (arg, )
    498 
--> 499         sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
    500         sargs = pickle.dumps(args, self.__pickle_proto)
    501 

/usr/local/lib/python3.8/dist-packages/pp/_pp.py in __dumpsfunc(self, funcs, modules)
    684             # should probably just 'try' above, if fail rely on dill.dumps
    685             self.__sfuncHM[hashs] = pickle.dumps(
--> 686                     (getname(funcs[0]), sources, modules),
    687                     self.__pickle_proto)
    688         return self.__sfuncHM[hashs]

/usr/local/lib/python3.8/dist-packages/dill/source.py in getname(obj, force, fqn)
    618         name = obj.__name__
    619         if name == '<lambda>':
--> 620             return getsource(obj).split('=',1)[0].strip()
    621         # handle some special cases
    622         if module.__name__ in ['builtins','__builtin__']:

/usr/local/lib/python3.8/dist-packages/dill/source.py in getsource(object, alias, lstrip, enclosing, force, builtin)
    352     # get source lines; if fail, try to 'force' an import
    353     try: # fails for builtins, and other assorted object types
--> 354         lines, lnum = getsourcelines(object, enclosing=enclosing)
    355     except (TypeError, IOError): # failed to get source, resort to import hooks
    356         if not force: # don't try to get types that findsource can't get

/usr/local/lib/python3.8/dist-packages/dill/source.py in getsourcelines(object, lstrip, enclosing)
    323     If lstrip=True, ensure there is no indentation in the first line of code.
    324     If enclosing=True, then also return any enclosing code."""
--> 325     code, n = getblocks(object, lstrip=lstrip, enclosing=enclosing, locate=True)
    326     return code[-1], n[-1]
    327 

/usr/local/lib/python3.8/dist-packages/dill/source.py in getblocks(object, lstrip, enclosing, locate)
    249     DEPRECATED: use 'getsourcelines' instead
    250     """
--> 251     lines, lnum = findsource(object)
    252 
    253     if ismodule(object):

/usr/local/lib/python3.8/dist-packages/dill/source.py in findsource(object)
    152 
    153     if not lines:
--> 154         raise IOError('could not extract source code')
    155 
    156     #FIXME: all below may fail if exec used (i.e. exec('f = lambda x:x') )

OSError: could not extract source code

A workaround seems to be the below. It work both in terminal and in a Jupyter notebook (from here)

import pathos.multiprocessing as mp

p = mp.Pool(4)
p.map(lambda x: x**2, range(10))