The pull request regarding this issue will address two items:
Appropriately passing in the respective embed methods keyword arguments at embed method definition
Addition of n_jobs to the embed(...) function for a user to easily specify the appropriate resources they have available to them.
Currently specifying method_args to kmers.embed(..., method_args) will result in an error as these arguments are being passed at the embed method's invocation. Instead, these arguments should be passed to the embed method's definition prior to invocation.
:bug: Specifying method_args for a respective method does not actually pass these arguments to the method. This should be changed to specify these args in each of the embed method functions prior to being called by the dispatcher:
Current:
try:
X = dispatcher[method](**method_args)
except ValueError as err:
if method == "sksne":
logger.warning(
f"embed_dimensions ({embed_dimensions}) is too high for sksne. Reducing to 3."
)
embed_dimensions = 3
X = dispatcher[method](**method_args)
else:
raise err
Suggested Change
:fire: Remove (**method_args) from dispatcher[method]
try:
X = dispatcher[method]()
except ValueError as err:
if method == "sksne":
logger.warning(
f"embed_dimensions ({embed_dimensions}) is too high for sksne. Reducing to 3."
)
embed_dimensions = 3
X = dispatcher[method]()
else:
raise err
Embedding error output
```bash
automappa-celery-1 | [2022-05-04 18:20:42,595: ERROR/ForkPoolWorker-1] Task automappa.tasks.embed_kmer[1f659b41-87de-4f74-8627-bbb50d48c131] raised unexpected: PermissionError(13, 'Permission denied')
automappa-celery-1 | Traceback (most recent call last):
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/celery/app/trace.py", line 451, in trace_task
automappa-celery-1 | R = retval = fun(*args, **kwargs)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/celery/app/trace.py", line 734, in __protected_call__
automappa-celery-1 | return self.run(*args, **kwargs)
automappa-celery-1 | File "/usr/src/app/automappa/tasks.py", line 102, in embed_kmer
automappa-celery-1 | embed_df = kmers.embed(
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/autometa/common/kmers.py", line 595, in embed
automappa-celery-1 | X = dispatcher[method](**method_args)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/autometa/common/kmers.py", line 574, in do_UMAP
automappa-celery-1 | return UMAP(
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/umap/umap_.py", line 2772, in fit_transform
automappa-celery-1 | self.fit(X, y)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/umap/umap_.py", line 2516, in fit
automappa-celery-1 | ) = nearest_neighbors(
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/umap/umap_.py", line 328, in nearest_neighbors
automappa-celery-1 | knn_search_index = NNDescent(
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/pynndescent/pynndescent_.py", line 782, in __init__
automappa-celery-1 | leaf_array = rptree_leaf_array(self._rp_forest)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/pynndescent/rp_trees.py", line 1033, in rptree_leaf_array
automappa-celery-1 | return np.vstack(rptree_leaf_array_parallel(rp_forest))
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/pynndescent/rp_trees.py", line 1025, in rptree_leaf_array_parallel
automappa-celery-1 | result = joblib.Parallel(n_jobs=-1, require="sharedmem")(
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 1056, in __call__
automappa-celery-1 | self.retrieve()
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 935, in retrieve
automappa-celery-1 | self._output.extend(job.get(timeout=self.timeout))
automappa-celery-1 | File "/opt/conda/lib/python3.9/multiprocessing/pool.py", line 771, in get
automappa-celery-1 | raise self._value
automappa-celery-1 | File "/opt/conda/lib/python3.9/multiprocessing/pool.py", line 125, in worker
automappa-celery-1 | result = (True, func(*args, **kwds))
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 595, in __call__
automappa-celery-1 | return self.func(*args, **kwargs)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__
automappa-celery-1 | return [func(*args, **kwargs)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 262, in
automappa-celery-1 | return [func(*args, **kwargs)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/dispatcher.py", line 439, in _compile_for_args
automappa-celery-1 | raise e
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/dispatcher.py", line 372, in _compile_for_args
automappa-celery-1 | return_val = self.compile(tuple(argtypes))
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/dispatcher.py", line 915, in compile
automappa-celery-1 | self._cache.save_overload(sig, cres)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/caching.py", line 661, in save_overload
automappa-celery-1 | self._save_overload(sig, data)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/caching.py", line 668, in _save_overload
automappa-celery-1 | self._impl.locator.ensure_cache_path()
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/caching.py", line 120, in ensure_cache_path
automappa-celery-1 | tempfile.TemporaryFile(dir=path).close()
automappa-celery-1 | File "/opt/conda/lib/python3.9/tempfile.py", line 618, in TemporaryFile
automappa-celery-1 | (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type)
automappa-celery-1 | File "/opt/conda/lib/python3.9/tempfile.py", line 255, in _mkstemp_inner
automappa-celery-1 | fd = _os.open(file, flags, 0o600)
automappa-celery-1 | PermissionError: [Errno 13] Permission denied: '/opt/conda/lib/python3.9/site-packages/pynndescent/__pycache__/tmpxp8ctueo'
```
The pull request regarding this issue will address two items:
n_jobs
to theembed(...)
function for a user to easily specify the appropriate resources they have available to them.Currently specifying
method_args
tokmers.embed(..., method_args)
will result in an error as these arguments are being passed at the embed method's invocation. Instead, these arguments should be passed to the embed method's definition prior to invocation.:bug: Specifying
method_args
for a respective method does not actually pass these arguments to the method. This should be changed to specify these args in each of the embed method functions prior to being called by the dispatcher:Current:
Suggested Change
(**method_args)
fromdispatcher[method]
**method args
to each embedding method**method_args
, e.gdispatcher[method]()
scikit-learn BH-tSNE
BH-tSNE
UMAP (and Densmap)
TriMap
Call embedding method
Embedding error output
```bash automappa-celery-1 | [2022-05-04 18:20:42,595: ERROR/ForkPoolWorker-1] Task automappa.tasks.embed_kmer[1f659b41-87de-4f74-8627-bbb50d48c131] raised unexpected: PermissionError(13, 'Permission denied') automappa-celery-1 | Traceback (most recent call last): automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/celery/app/trace.py", line 451, in trace_task automappa-celery-1 | R = retval = fun(*args, **kwargs) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/celery/app/trace.py", line 734, in __protected_call__ automappa-celery-1 | return self.run(*args, **kwargs) automappa-celery-1 | File "/usr/src/app/automappa/tasks.py", line 102, in embed_kmer automappa-celery-1 | embed_df = kmers.embed( automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/autometa/common/kmers.py", line 595, in embed automappa-celery-1 | X = dispatcher[method](**method_args) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/autometa/common/kmers.py", line 574, in do_UMAP automappa-celery-1 | return UMAP( automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/umap/umap_.py", line 2772, in fit_transform automappa-celery-1 | self.fit(X, y) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/umap/umap_.py", line 2516, in fit automappa-celery-1 | ) = nearest_neighbors( automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/umap/umap_.py", line 328, in nearest_neighbors automappa-celery-1 | knn_search_index = NNDescent( automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/pynndescent/pynndescent_.py", line 782, in __init__ automappa-celery-1 | leaf_array = rptree_leaf_array(self._rp_forest) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/pynndescent/rp_trees.py", line 1033, in rptree_leaf_array automappa-celery-1 | return np.vstack(rptree_leaf_array_parallel(rp_forest)) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/pynndescent/rp_trees.py", line 1025, in rptree_leaf_array_parallel automappa-celery-1 | result = joblib.Parallel(n_jobs=-1, require="sharedmem")( automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 1056, in __call__ automappa-celery-1 | self.retrieve() automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 935, in retrieve automappa-celery-1 | self._output.extend(job.get(timeout=self.timeout)) automappa-celery-1 | File "/opt/conda/lib/python3.9/multiprocessing/pool.py", line 771, in get automappa-celery-1 | raise self._value automappa-celery-1 | File "/opt/conda/lib/python3.9/multiprocessing/pool.py", line 125, in worker automappa-celery-1 | result = (True, func(*args, **kwds)) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/_parallel_backends.py", line 595, in __call__ automappa-celery-1 | return self.func(*args, **kwargs) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 262, in __call__ automappa-celery-1 | return [func(*args, **kwargs) automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/joblib/parallel.py", line 262, in
automappa-celery-1 | return [func(*args, **kwargs)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/dispatcher.py", line 439, in _compile_for_args
automappa-celery-1 | raise e
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/dispatcher.py", line 372, in _compile_for_args
automappa-celery-1 | return_val = self.compile(tuple(argtypes))
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/dispatcher.py", line 915, in compile
automappa-celery-1 | self._cache.save_overload(sig, cres)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/caching.py", line 661, in save_overload
automappa-celery-1 | self._save_overload(sig, data)
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/caching.py", line 668, in _save_overload
automappa-celery-1 | self._impl.locator.ensure_cache_path()
automappa-celery-1 | File "/opt/conda/lib/python3.9/site-packages/numba/core/caching.py", line 120, in ensure_cache_path
automappa-celery-1 | tempfile.TemporaryFile(dir=path).close()
automappa-celery-1 | File "/opt/conda/lib/python3.9/tempfile.py", line 618, in TemporaryFile
automappa-celery-1 | (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type)
automappa-celery-1 | File "/opt/conda/lib/python3.9/tempfile.py", line 255, in _mkstemp_inner
automappa-celery-1 | fd = _os.open(file, flags, 0o600)
automappa-celery-1 | PermissionError: [Errno 13] Permission denied: '/opt/conda/lib/python3.9/site-packages/pynndescent/__pycache__/tmpxp8ctueo'
```