multiprocessing and spark backends will fail when torch is not installed, even with non ml pipelines.
.venv/lib/python3.9/site-packages/confit/registry.py:332: in wrapper_function
raise e.with_traceback(remove_lib_from_traceback(e.__traceback__))
pydantic/decorator.py:206: in pydantic.decorator.ValidatedFunction.execute
???
edsnlp/data/parquet.py:283: in write_parquet
return data.write(
edsnlp/core/lazy_collection.py:210: in write
return lc.execute() if execute else lc
edsnlp/core/lazy_collection.py:241: in execute
return execute(self)
edsnlp/processing/multiprocessing.py:643: in execute_multiprocessing_backend
with tempfile.NamedTemporaryFile(delete=False) as fp:
> dump(lc.worker_copy(), fp)
E NameError: name 'dump' is not defined
NameError
and there are some issues due to pickling when the above error is fixed
venv/lib/python3.7/site-packages/dill/_dill.py:412: PicklingWarning: Cannot locate reference to <class 'edsnlp.core.registry.registry'>.
StockPickler.save(self, obj, save_persistent_id)
venv/lib/python3.7/site-packages/dill/_dill.py:412: PicklingWarning: Cannot pickle <class 'edsnlp.core.registry.registry'>: edsnlp.core.registry.registry has recursive self-references that trigger a RecursionError.
StockPickler.save(self, obj, save_persistent_id)
Process ForkProcess-1:
Process ForkProcess-2:
Process ForkProcess-3:
Process ForkProcess-4:
Traceback (most recent call last):
Traceback (most recent call last):
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 425, in run
self._run()
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 343, in _run
lc = load(self.lazy_collection_path, map_location=self.device)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 425, in run
self._run()
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 202, in load
return dill.load(f, *args, **kwargs)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 343, in _run
lc = load(self.lazy_collection_path, map_location=self.device)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 425, in run
self._run()
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 287, in load
return Unpickler(file, ignore=ignore, **kwds).load()
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 202, in load
return dill.load(f, *args, **kwargs)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 343, in _run
lc = load(self.lazy_collection_path, map_location=self.device)
File "/export/home/acohen/.user_conda/miniconda/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 442, in load
obj = StockUnpickler.load(self)
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 287, in load
return Unpickler(file, ignore=ignore, **kwds).load()
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 202, in load
return dill.load(f, *args, **kwargs)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 425, in run
self._run()
File "venv/lib/python3.7/site-packages/confit/registry.py", line 524, in __setattr__
assert isinstance(value, Registry)
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 287, in load
return Unpickler(file, ignore=ignore, **kwds).load()
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 442, in load
obj = StockUnpickler.load(self)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 343, in _run
lc = load(self.lazy_collection_path, map_location=self.device)
AssertionError
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 442, in load
obj = StockUnpickler.load(self)
File "venv/lib/python3.7/site-packages/confit/registry.py", line 524, in __setattr__
assert isinstance(value, Registry)
File "venv/lib/python3.7/site-packages/edsnlp/processing/multiprocessing.py", line 202, in load
return dill.load(f, *args, **kwargs)
File "venv/lib/python3.7/site-packages/confit/registry.py", line 524, in __setattr__
assert isinstance(value, Registry)
AssertionError
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 287, in load
return Unpickler(file, ignore=ignore, **kwds).load()
AssertionError
File "venv/lib/python3.7/site-packages/dill/_dill.py", line 442, in load
obj = StockUnpickler.load(self)
File "venv/lib/python3.7/site-packages/confit/registry.py", line 524, in __setattr__
assert isinstance(value, Registry)
AssertionError
Related to #242
Description
multiprocessing
andspark
backends will fail when torch is not installed, even with non ml pipelines.and there are some issues due to pickling when the above error is fixed