STOmics / Stereopy

A toolkit of spatial transcriptomic analysis.
MIT License
187 stars 64 forks source link

AttributeError with spatial_hotspot when multiprocess #145

Closed playgamemy closed 1 year ago

playgamemy commented 1 year ago

Following the tutorial listed in here for spatial hotspot analysis: https://stereopy.readthedocs.io/en/latest/Tutorials/Spatial_Hotspot.html

#  analysis of spatial hotspot (with multiprocess)
data.tl.spatial_hotspot(
                    use_highly_genes=True,
                    use_raw=True,
                    hvg_res_key='highly_variable_genes',
                    model='normal',
                    n_neighbors=30,
                    n_jobs=20,
                    fdr_threshold=0.05,
                    min_gene_threshold=10,
                    res_key='spatial_hotspot',
                    )

Any n_jobs > 1 will lead to error below. Function run properly if n_jobs=1. Seems to be a bug with the "hotspotsc" package that is going to be fixed. https://github.com/YosefLab/Hotspot/pull/26

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[4], line 2
      1 #  analysis of spatial hotspot (with multiprocess)
----> 2 data.tl.spatial_hotspot(
      3                     use_highly_genes=True,
      4                     use_raw=True,
      5                     hvg_res_key='highly_variable_genes',
      6                     model='normal',
      7                     n_neighbors=30,
      8                     n_jobs=2,
      9                     fdr_threshold=0.05,
     10                     min_gene_threshold=10,
     11                     res_key='spatial_hotspot',
     12                     )

File ~\miniconda3\envs\st\lib\site-packages\stereo\core\st_pipeline.py:39, in logit.<locals>.wrapped(*args, **kwargs)
     37 logger.info('start to run {}...'.format(func.__name__))
     38 tk = tc.start()
---> 39 res = func(*args, **kwargs)
     40 logger.info('{} end, consume time {:.4f}s.'.format(func.__name__, tc.get_time_consumed(key=tk, restart=False)))
     41 return res

File ~\miniconda3\envs\st\lib\site-packages\stereo\core\st_pipeline.py:1061, in StPipeline.spatial_hotspot(self, use_highly_genes, hvg_res_key, model, n_neighbors, n_jobs, fdr_threshold, min_gene_threshold, outdir, res_key, use_raw)
   1059     highly_genes_name = df.index[df['highly_variable']]
   1060     data = data.sub_by_name(gene_name=highly_genes_name)
-> 1061 hs = spatial_hotspot(data, model=model, n_neighbors=n_neighbors, n_jobs=n_jobs, fdr_threshold=fdr_threshold,
   1062                      min_gene_threshold=min_gene_threshold, outdir=outdir)
   1063 # res = {"results":hs.results, "local_cor_z": hs.local_correlation_z, "modules": hs.modules,
   1064 #        "module_scores": hs.module_scores}
   1065 self.result[res_key] = hs

File ~\miniconda3\envs\st\lib\site-packages\stereo\algorithm\spatial_hotspot.py:55, in spatial_hotspot(data, model, n_neighbors, n_jobs, fdr_threshold, min_gene_threshold, outdir)
     51 hs.create_knn_graph(
     52     weighted_graph=False, n_neighbors=n_neighbors,
     53 )
     54 logger.info('Start compute_autocorrelations.')
---> 55 hs_results = hs.compute_autocorrelations(jobs=n_jobs)
     56 # select the genes with significant spatial autocorrelation
     57 hs_genes = hs_results.index[hs_results.FDR < fdr_threshold]

File ~\miniconda3\envs\st\lib\site-packages\hotspot\hotspot.py:436, in Hotspot.compute_autocorrelations(self, jobs)
    412 def compute_autocorrelations(self, jobs=1):
    413     """Perform feature selection using local autocorrelation
    414 
    415     In addition to returning output, this also stores the output
   (...)
    434 
    435     """
--> 436     return self._compute_hotspot(jobs)

File ~\miniconda3\envs\st\lib\site-packages\hotspot\hotspot.py:397, in Hotspot._compute_hotspot(self, jobs)
    370 def _compute_hotspot(self, jobs=1):
    371     """Perform feature selection using local autocorrelation
    372 
    373     In addition to returning output, this also stores the output
   (...)
    394 
    395     """
--> 397     results = compute_hs(
    398         self.counts,
    399         self.neighbors,
    400         self.weights,
    401         self.umi_counts,
    402         self.model,
    403         genes=self.adata.var_names,
    404         centered=True,
    405         jobs=jobs,
    406     )
    408     self.results = results
    410     return self.results

File ~\miniconda3\envs\st\lib\site-packages\hotspot\local_stats.py:206, in compute_hs(counts, neighbors, weights, num_umi, model, genes, centered, jobs)
    202     g_D = D
    204 if jobs > 1:
--> 206     with multiprocessing.Pool(processes=jobs, initializer=initializer) as pool:
    208         results = list(
    209             tqdm(pool.imap(_map_fun_parallel, data_iter()), total=counts.shape[0])
    210         )
    211 else:

File ~\miniconda3\envs\st\lib\multiprocessing\context.py:119, in BaseContext.Pool(self, processes, initializer, initargs, maxtasksperchild)
    117 '''Returns a process pool object'''
    118 from .pool import Pool
--> 119 return Pool(processes, initializer, initargs, maxtasksperchild,
    120             context=self.get_context())

File ~\miniconda3\envs\st\lib\multiprocessing\pool.py:212, in Pool.__init__(self, processes, initializer, initargs, maxtasksperchild, context)
    210 self._processes = processes
    211 try:
--> 212     self._repopulate_pool()
    213 except Exception:
    214     for p in self._pool:

File ~\miniconda3\envs\st\lib\multiprocessing\pool.py:303, in Pool._repopulate_pool(self)
    302 def _repopulate_pool(self):
--> 303     return self._repopulate_pool_static(self._ctx, self.Process,
    304                                         self._processes,
    305                                         self._pool, self._inqueue,
    306                                         self._outqueue, self._initializer,
    307                                         self._initargs,
    308                                         self._maxtasksperchild,
    309                                         self._wrap_exception)

File ~\miniconda3\envs\st\lib\multiprocessing\pool.py:326, in Pool._repopulate_pool_static(ctx, Process, processes, pool, inqueue, outqueue, initializer, initargs, maxtasksperchild, wrap_exception)
    324 w.name = w.name.replace('Process', 'PoolWorker')
    325 w.daemon = True
--> 326 w.start()
    327 pool.append(w)
    328 util.debug('added worker')

File ~\miniconda3\envs\st\lib\multiprocessing\process.py:121, in BaseProcess.start(self)
    118 assert not _current_process._config.get('daemon'), \
    119        'daemonic processes are not allowed to have children'
    120 _cleanup()
--> 121 self._popen = self._Popen(self)
    122 self._sentinel = self._popen.sentinel
    123 # Avoid a refcycle if the target function holds an indirect
    124 # reference to the process object (see bpo-30775)

File ~\miniconda3\envs\st\lib\multiprocessing\context.py:327, in SpawnProcess._Popen(process_obj)
    324 @staticmethod
    325 def _Popen(process_obj):
    326     from .popen_spawn_win32 import Popen
--> 327     return Popen(process_obj)

File ~\miniconda3\envs\st\lib\multiprocessing\popen_spawn_win32.py:93, in Popen.__init__(self, process_obj)
     91 try:
     92     reduction.dump(prep_data, to_child)
---> 93     reduction.dump(process_obj, to_child)
     94 finally:
     95     set_spawning_popen(None)

File ~\miniconda3\envs\st\lib\multiprocessing\reduction.py:60, in dump(obj, file, protocol)
     58 def dump(obj, file, protocol=None):
     59     '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60     ForkingPickler(file, protocol).dump(obj)

AttributeError: Can't pickle local object 'compute_hs.<locals>.initializer'
tanliwei-coder commented 1 year ago

Hi!!

I think you must have ran it on windows os, you can set the n_jobs to 1 to avoid this error temporarily, and, i looked for a PR on git repo of Hotspot in which I notice that the bug has been fixed and the PR has been merged to master,but the pacakge is not yet distributed, so we can not upgrade directly by running pip install, if you want to run on multiprocessing, you can try to install by source code or run it on Linux.

tanliwei-coder commented 1 year ago

@playgamemy

You can install the latest version hotspot by running pip install --no-deps git+https://github.com/YosefLab/Hotspot.git, and you need to uninstall the installed one beforehand.

I tryed to run it and the error has not happened.