aertslab / scenicplus

SCENIC+ is a python package to build gene regulatory networks (GRNs) using combined or separate single-cell gene expression (scRNA-seq) and single-cell chromatin accessibility (scATAC-seq) data.
Other
167 stars 27 forks source link

AttributeError: ('PyRanges object has no attribute', 'Overlap') #195

Closed pchiang5 closed 11 months ago

pchiang5 commented 11 months ago

Describe the bug Hi,

The error jumped out when I ran the following code.

To Reproduce

run_pycistarget(
    region_sets = region_sets,
    species = 'homo_sapiens',
    save_path = os.path.join(work_dir, 'motifs'),
    ctx_db_path = rankings_db,
    dem_db_path = scores_db,
    path_to_motif_annotations = motif_annotation,
    run_without_promoters = True,
    n_cpu = 40,
    _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
    annotation_version = 'v10nr_clust',
    )

Error output

> 2023-08-09 14:33:06,830 pycisTarget_wrapper INFO     /mnt/c/Users/pc/Downloads/motifs folder already exists.
> 2023-08-09 14:33:10,738 pycisTarget_wrapper INFO     Loading cisTarget database for topics_otsu
> 2023-08-09 14:33:10,739 cisTarget    INFO     Reading cisTarget database
> ---------------------------------------------------------------------------
> AssertionError                            Traceback (most recent call last)
> Cell In[71], line 2
>       1 from scenicplus.wrappers.run_pycistarget import run_pycistarget
> ----> 2 run_pycistarget(
>       3     region_sets = region_sets,
>       4     species = 'homo_sapiens',
>       5     save_path = os.path.join(work_dir, 'motifs'),
>       6     ctx_db_path = rankings_db,
>       7     dem_db_path = scores_db,
>       8     path_to_motif_annotations = motif_annotation,
>       9     run_without_promoters = True,
>      10     n_cpu = 40,
>      11     _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
>      12     annotation_version = 'v10nr_clust',
>      13     )
> 
> File /mnt/c/Users/pc/Downloads/scenicplus/src/scenicplus/wrappers/run_pycistarget.py:182, in run_pycistarget(region_sets, species, save_path, custom_annot, save_partial, ctx_db_path, dem_db_path, run_without_promoters, biomart_host, promoter_space, ctx_auc_threshold, ctx_nes_threshold, ctx_rank_threshold, dem_log2fc_thr, dem_motif_hit_thr, dem_max_bg_regions, annotation, motif_similarity_fdr, path_to_motif_annotations, annotation_version, n_cpu, _temp_dir, exclude_motifs, exclude_collection, **kwargs)
>     180 ## CISTARGET
>     181 regions = region_sets[key]
> --> 182 ctx_db = cisTargetDatabase(ctx_db_path, regions)
>     183 if exclude_motifs is not None:
>     184     out = pd.read_csv(exclude_motifs, header=None).iloc[:,0].tolist()
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_cistarget.py:67, in cisTargetDatabase.__init__(self, fname, region_sets, name, fraction_overlap)
>      48 def __init__(self,
>      49             fname: str,
>      50             region_sets: Union[Dict[str, pr.PyRanges], pr.PyRanges] = None,
>      51             name: str = None,
>      52             fraction_overlap: float = 0.4):
>      53     """
>      54     Initialize cisTargetDatabase
>      55
>    (...)
>      65         Minimal overlap between query and regions in the database for the mapping.
>      66     """
> ---> 67     self.regions_to_db, self.db_rankings, self.total_regions = self.load_db(fname,
>      68                                                       region_sets,
>      69                                                       name,
>      70                                                       fraction_overlap)
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_cistarget.py:110, in cisTargetDatabase.load_db(self, fname, region_sets, name, fraction_overlap)
>     108 if name is None:
>     109     name = os.path.basename(fname)
> --> 110 db = FeatherRankingDatabase(fname, name=name)
>     111 total_regions = db.total_genes
>     112 db_regions = db.genes
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/ctxcore/rnkdb.py:106, in FeatherRankingDatabase.__init__(self, fname, name)
>      98 """
>      99 Create a new feather database.
>     100
>     101 :param fname: The filename of the database.
>     102 :param name: The name of the database.
>     103 """
>     104 super().__init__(name=name)
> --> 106 assert os.path.isfile(fname), """Database "{fname}" doesn't exist."""
>     108 self._fname = fname
>     109 self.ct_db = CisTargetDatabase.init_ct_db(
>     110     ct_db_filename=self._fname, engine="pyarrow"
>     111 )
> 
> AssertionError: Database "{fname}" doesn't exist.
> 
> In [72]: rankings_db
> Out[72]: '/mnt/c/Users/pc/Downloads/cluster_SCREEN.regions_vs_motifs.rankings.v2.feather'
> 
> In [73]:
>     ...: rankings_db = os.path.join(db_fpath, 'hg38_screen_v10_clust.regions_vs_motifs.rankings.feather')
>     ...: scores_db =  os.path.join(db_fpath, 'hg38_screen_v10_clust.regions_vs_motifs.scores.feather')
>     ...: motif_annotation = os.path.join(motif_annot_fpath, 'motifs-v10-nr.hgnc-m0.00001-o0.0.tbl')
>     ...:
>     ...: from scenicplus.wrappers.run_pycistarget import run_pycistarget
>     ...: run_pycistarget(
>     ...:     region_sets = region_sets,
>     ...:     species = 'homo_sapiens',
>     ...:     save_path = os.path.join(work_dir, 'motifs'),
>     ...:     ctx_db_path = rankings_db,
>     ...:     dem_db_path = scores_db,
>     ...:     path_to_motif_annotations = motif_annotation,
>     ...:     run_without_promoters = True,
>     ...:     n_cpu = 40,
>     ...:     _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
>     ...:     annotation_version = 'v10nr_clust',
>     ...:     )
> 2023-08-09 14:34:50,376 pycisTarget_wrapper INFO     /mnt/c/Users/pc/Downloads/motifs folder already exists.
> 2023-08-09 14:34:51,807 pycisTarget_wrapper INFO     Loading cisTarget database for topics_otsu
> 2023-08-09 14:34:51,807 cisTarget    INFO     Reading cisTarget database
> ---------------------------------------------------------------------------
> AttributeError                            Traceback (most recent call last)
> Cell In[73], line 6
>       3 motif_annotation = os.path.join(motif_annot_fpath, 'motifs-v10-nr.hgnc-m0.00001-o0.0.tbl')
>       5 from scenicplus.wrappers.run_pycistarget import run_pycistarget
> ----> 6 run_pycistarget(
>       7     region_sets = region_sets,
>       8     species = 'homo_sapiens',
>       9     save_path = os.path.join(work_dir, 'motifs'),
>      10     ctx_db_path = rankings_db,
>      11     dem_db_path = scores_db,
>      12     path_to_motif_annotations = motif_annotation,
>      13     run_without_promoters = True,
>      14     n_cpu = 40,
>      15     _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
>      16     annotation_version = 'v10nr_clust',
>      17     )
> 
> File /mnt/c/Users/pc/Downloads/scenicplus/src/scenicplus/wrappers/run_pycistarget.py:182, in run_pycistarget(region_sets, species, save_path, custom_annot, save_partial, ctx_db_path, dem_db_path, run_without_promoters, biomart_host, promoter_space, ctx_auc_threshold, ctx_nes_threshold, ctx_rank_threshold, dem_log2fc_thr, dem_motif_hit_thr, dem_max_bg_regions, annotation, motif_similarity_fdr, path_to_motif_annotations, annotation_version, n_cpu, _temp_dir, exclude_motifs, exclude_collection, **kwargs)
>     180 ## CISTARGET
>     181 regions = region_sets[key]
> --> 182 ctx_db = cisTargetDatabase(ctx_db_path, regions)
>     183 if exclude_motifs is not None:
>     184     out = pd.read_csv(exclude_motifs, header=None).iloc[:,0].tolist()
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_cistarget.py:67, in cisTargetDatabase.__init__(self, fname, region_sets, name, fraction_overlap)
>      48 def __init__(self,
>      49             fname: str,
>      50             region_sets: Union[Dict[str, pr.PyRanges], pr.PyRanges] = None,
>      51             name: str = None,
>      52             fraction_overlap: float = 0.4):
>      53     """
>      54     Initialize cisTargetDatabase
>      55
>    (...)
>      65         Minimal overlap between query and regions in the database for the mapping.
>      66     """
> ---> 67     self.regions_to_db, self.db_rankings, self.total_regions = self.load_db(fname,
>      68                                                       region_sets,
>      69                                                       name,
>      70                                                       fraction_overlap)
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_cistarget.py:119, in cisTargetDatabase.load_db(self, fname, region_sets, name, fraction_overlap)
>     117 if region_sets is not None:
>     118     if type(region_sets) == dict:
> --> 119         target_to_db_dict = {x: target_to_query(region_sets[x], list(db_regions), fraction_overlap = fraction_overlap) for x in region_sets.keys()}
>     120         target_regions_in_db = list(set(sum([target_to_db_dict[x]['Query'].tolist() for x in target_to_db_dict.keys()],[])))
>     121     elif type(region_sets) == pr.PyRanges:
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_cistarget.py:119, in <dictcomp>(.0)
>     117 if region_sets is not None:
>     118     if type(region_sets) == dict:
> --> 119         target_to_db_dict = {x: target_to_query(region_sets[x], list(db_regions), fraction_overlap = fraction_overlap) for x in region_sets.keys()}
>     120         target_regions_in_db = list(set(sum([target_to_db_dict[x]['Query'].tolist() for x in target_to_db_dict.keys()],[])))
>     121     elif type(region_sets) == pr.PyRanges:
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/utils.py:283, in target_to_query(target, query, fraction_overlap)
>     280     query_pr=query
>     282 join_pr = target_pr.join(query_pr, report_overlap = True)
> --> 283 join_pr.Overlap_query =  join_pr.Overlap/(join_pr.End_b - join_pr.Start_b)
>     284 join_pr.Overlap_target =  join_pr.Overlap/(join_pr.End - join_pr.Start)
>     285 join_pr = join_pr[(join_pr.Overlap_query > fraction_overlap) | (join_pr.Overlap_target > fraction_overlap)]
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pyranges/pyranges_main.py:265, in PyRanges.__getattr__(self, name)
>     240 """Return column.
>     241
>     242 Parameters
>    (...)
>     260 Name: Start, dtype: int64
>     261 """
>     263 from pyranges.methods.attr import _getattr
> --> 265 return _getattr(self, name)
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pyranges/methods/attr.py:65, in _getattr(self, name)
>      63     return pd.concat([df[name] for df in self.values()])
>      64 else:
> ---> 65     raise AttributeError("PyRanges object has no attribute", name)
> 
> AttributeError: ('PyRanges object has no attribute', 'Overlap')
> 

Expected behavior Like what showed in the tutorial

> 
> 2022-08-05 08:53:16,277 pycisTarget_wrapper INFO     pbmc_tutorial/motifs folder already exists.
> 2022-08-05 08:53:17,650 pycisTarget_wrapper INFO     Loading cisTarget database for topics_otsu
> 2022-08-05 08:53:17,653 cisTarget    INFO     Reading cisTarget database
> 2022-08-05 09:13:51,198 pycisTarget_wrapper INFO     Running cisTarget for topics_otsu
> 
> (ctx_internal_ray pid=17049) 2022-08-05 09:14:38,091 cisTarget    INFO     Running cisTarget for Topic1 which has 4760 regions
> (ctx_internal_ray pid=17050) 2022-08-05 09:14:38,555 cisTarget    INFO     Running cisTarget for Topic2 which has 6170 regions
> (ctx_internal_ray pid=17047) 2022-08-05 09:14:38,925 cisTarget    INFO     Running cisTarget for Topic3 which has 4559 regions
> (ctx_internal_ray pid=17040) 2022-08-05 09:14:39,376 cisTarget    INFO     Running cisTarget for Topic4 which has 3273 regions
> (ctx_internal_ray pid=17043) 2022-08-05 09:14:39,819 cisTarget    INFO     Running cisTarget for Topic5 which has 2115 regions
> (ctx_internal_ray pid=17044) 2022-08-05 09:14:40,172 cisTarget    INFO     Running cisTarget for Topic6 which has 3547 regions
> (ctx_internal_ray pid=17046) 2022-08-05 09:14:40,636 cisTarget    INFO     Running cisTarget for Topic7 which has 5380 regions
> (ctx_internal_ray pid=17048) 2022-08-05 09:14:41,022 cisTarget    INFO     Running cisTarget for Topic8 which has 7775 regions
> 2022-08-05 09:16:01,882 cisTarget    INFO     Done!
> 2022-08-05 09:16:01,884 pycisTarget_wrapper INFO     pbmc_tutorial/motifs/CTX_topics_otsu_All folder already exists.
> 2022-08-05 09:16:02,212 pycisTarget_wrapper INFO     Running cisTarget without promoters for topics_otsu
> 2022-08-05 09:16:13,911 INFO services.py:1470 -- View the Ray dashboard at http://127.0.0.1:8266
> 2022-08-05 09:17:31,790 cisTarget    INFO     Done!
> 2022-08-05 09:17:31,793 pycisTarget_wrapper INFO     pbmc_tutorial/motifs/CTX_topics_otsu_No_promoters folder already exists.
> 2022-08-05 09:17:32,015 pycisTarget_wrapper INFO     Running DEM for topics_otsu
> 2022-08-05 09:17:32,017 DEM          INFO     Reading DEM database
> 

**Screenshots**
In [74]: region_sets
Out[74]:
{'topics_otsu': {'Topic1': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 16656655  | 16657155  |
  | chr1         | 206206881 | 206207381 |
  | chr1         | 108380560 | 108381060 |
  | chr1         | 16750492  | 16750992  |
  | ...          | ...       | ...       |
  | chrX         | 135051322 | 135051822 |
  | chrX         | 154547094 | 154547594 |
  | chrX         | 119574150 | 119574650 |
  | chrX         | 47193417  | 47193917  |
  | chrY         | 12861016  | 12861516  |
  | chrY         | 7297208   | 7297708   |
  | chrY         | 7603140   | 7603640   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 3,003 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic2': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 162561622 | 162562122 |
  | chr1         | 41357457  | 41357957  |
  | chr1         | 144760309 | 144760809 |
  | chr1         | 200755450 | 200755950 |
  | ...          | ...       | ...       |
  | chrY         | 9536610   | 9537110   |
  | chrY         | 9346215   | 9346715   |
  | chrY         | 13702739  | 13703239  |
  | chrY         | 7273681   | 7274181   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,906 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic3': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 120424705 | 120425205 |
  | chr1         | 145415381 | 145415881 |
  | chr1         | 149093394 | 149093894 |
  | chr1         | 146241363 | 146241863 |
  | ...          | ...       | ...       |
  | chrX         | 48911531  | 48912031  |
  | chrX         | 9462840   | 9463340   |
  | chrX         | 47556039  | 47556539  |
  | chrX         | 119880613 | 119881113 |
  | chrY         | 25491379  | 25491879  |
  | chrY         | 16162378  | 16162878  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,718 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic4': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 235504810 | 235505310 |
  | chr1         | 144801579 | 144802079 |
  | chr1         | 16703091  | 16703591  |
  | chr1         | 33361612  | 33362112  |
  | ...          | ...       | ...       |
  | chrX         | 110002342 | 110002842 |
  | chrX         | 53092989  | 53093489  |
  | chrX         | 71107846  | 71108346  |
  | chrX         | 54774481  | 54774981  |
  | chrY         | 12905364  | 12905864  |
  | chrY         | 18458978  | 18459478  |
  | chrY         | 19567297  | 19567797  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,721 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic5': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 121021500 | 121022000 |
  | chr1         | 149713721 | 149714221 |
  | chr1         | 30768965  | 30769465  |
  | chr1         | 16759506  | 16760006  |
  | ...          | ...       | ...       |
  | chrX         | 49155128  | 49155628  |
  | chrX         | 154805426 | 154805926 |
  | chrX         | 40148310  | 40148810  |
  | chrX         | 54183215  | 54183715  |
  | chrY         | 19567297  | 19567797  |
  | chrY         | 2934788   | 2935288   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 1,331 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic6': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 121233962 | 121234462 |
  | chr1         | 145045878 | 145046378 |
  | chr1         | 206252536 | 206253036 |
  | chr1         | 144021584 | 144022084 |
  | ...          | ...       | ...       |
  | chrX         | 150507469 | 150507969 |
  | chrX         | 40580346  | 40580846  |
  | chrX         | 24025083  | 24025583  |
  | chrX         | 103533597 | 103534097 |
  | chrY         | 23818892  | 23819392  |
  | chrY         | 25850701  | 25851201  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,127 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic7': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 120917352 | 120917852 |
  | chr1         | 38873229  | 38873729  |
  | chr1         | 149611453 | 149611953 |
  | chr1         | 145211126 | 145211626 |
  | ...          | ...       | ...       |
  | chrX         | 72123919  | 72124419  |
  | chrX         | 23783125  | 23783625  |
  | chrX         | 84501651  | 84502151  |
  | chrX         | 154805426 | 154805926 |
  | chrY         | 18860918  | 18861418  |
  | chrY         | 24178267  | 24178767  |
  | chrY         | 13479979  | 13480479  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 3,507 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic8': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 235328157 | 235328657 |
  | chr1         | 121184357 | 121184857 |
  | chr1         | 16513668  | 16514168  |
  | chr1         | 243255144 | 243255644 |
  | ...          | ...       | ...       |
  | chrX         | 107654923 | 107655423 |
  | chrX         | 24025083  | 24025583  |
  | chrX         | 112839820 | 112840320 |
  | chrX         | 30889153  | 30889653  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,117 rows and 3 columns from 23 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic9': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 228622335 | 228622835 |
  | chr1         | 228644706 | 228645206 |
  | chr1         | 228640234 | 228640734 |
  | chr1         | 144917062 | 144917562 |
  | ...          | ...       | ...       |
  | chrX         | 16956516  | 16957016  |
  | chrX         | 72079928  | 72080428  |
  | chrX         | 38327327  | 38327827  |
  | chrX         | 154408974 | 154409474 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 1,835 rows and 3 columns from 23 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic10': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 143971946 | 143972446 |
  | chr1         | 145095563 | 145096063 |
  | chrMT        | 3217      | 3717      |
  | chrMT        | 13944     | 14444     |
  | ...          | ...       | ...       |
  | chrMT        | 14949     | 15449     |
  | chr1         | 143971946 | 143972446 |
  | chrMT        | 2094      | 2594      |
  | chrMT        | 13001     | 13501     |
  | chrMT        | 5002      | 5502      |
  | chrMT        | 8095      | 8595      |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 26 rows and 3 columns from 2 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic11': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 155166719 | 155167219 |
  | chr1         | 144247504 | 144248004 |
  | chr1         | 205284753 | 205285253 |
  | chr1         | 27615029  | 27615529  |
  | ...          | ...       | ...       |
  | chrX         | 137051960 | 137052460 |
  | chrX         | 154372593 | 154373093 |
  | chrX         | 119468737 | 119469237 |
  | chrX         | 101407630 | 101408130 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,450 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic12': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 149124227 | 149124727 |
  | chr1         | 144485312 | 144485812 |
  | chr1         | 148625500 | 148626000 |
  | chr1         | 146174565 | 146175065 |
  | ...          | ...       | ...       |
  | chrX         | 102713331 | 102713831 |
  | chrX         | 74614638  | 74615138  |
  | chrX         | 48891503  | 48892003  |
  | chrX         | 154408974 | 154409474 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 1,304 rows and 3 columns from 23 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic13': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 145066491 | 145066991 |
  | chr1         | 144000931 | 144001431 |
  | chr1         | 120015909 | 120016409 |
  | chr1         | 145017419 | 145017919 |
  | ...          | ...       | ...       |
  | chrX         | 110001789 | 110002289 |
  | chrX         | 118823553 | 118824053 |
  | chrX         | 68684348  | 68684848  |
  | chrX         | 40735510  | 40736010  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,116 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic14': +--------------+-----------+-----------+
  | Chromosome   |     Start |       End |
  | (category)   |   (int64) |   (int64) |
  |--------------+-----------+-----------|
  | chr1         | 146228991 | 146229491 |
  | chr1         | 148679736 | 148680236 |
  | chr1         | 149390073 | 149390573 |
  | chr1         | 120069620 | 120070120 |
  | chr15        |  84344489 |  84344989 |
  | chr15        |  84229918 |  84230418 |
  | chr15        |  85234563 |  85235063 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 7 rows and 3 columns from 2 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic15': +--------------+-----------+-----------+
  | Chromosome   |     Start |       End |
  | (category)   |   (int64) |   (int64) |
  |--------------+-----------+-----------|
  | chr1         | 143803246 | 143803746 |
  | chr1         | 145157472 | 145157972 |
  | chr1         | 149711865 | 149712365 |
  | chr1         | 121019984 | 121020484 |
  | chr4         |  86594025 |  86594525 |
  | chr16        |  16315545 |  16316045 |
  | chr16        |  18352630 |  18353130 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 7 rows and 3 columns from 3 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic16': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 145161205 | 145161705 |
  | chr1         | 143799513 | 143800013 |
  | chr1         | 121016314 | 121016814 |
  | chr1         | 149708252 | 149708752 |
  | ...          | ...       | ...       |
  | chrX         | 125492477 | 125492977 |
  | chrX         | 106862845 | 106863345 |
  | chrX         | 7148335   | 7148835   |
  | chrX         | 132022573 | 132023073 |
  | chrY         | 24325821  | 24326321  |
  | chrY         | 15624896  | 15625396  |
  | chrY         | 7272525   | 7273025   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 1,461 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.},
 'topics_top_3': {'Topic1': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 16656655  | 16657155  |
  | chr1         | 206206881 | 206207381 |
  | chr1         | 108380560 | 108381060 |
  | chr1         | 16750492  | 16750992  |
  | ...          | ...       | ...       |
  | chrX         | 135051322 | 135051822 |
  | chrX         | 154547094 | 154547594 |
  | chrX         | 119574150 | 119574650 |
  | chrX         | 47193417  | 47193917  |
  | chrY         | 12861016  | 12861516  |
  | chrY         | 7297208   | 7297708   |
  | chrY         | 7603140   | 7603640   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 3,000 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic2': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 162561622 | 162562122 |
  | chr1         | 41357457  | 41357957  |
  | chr1         | 144760309 | 144760809 |
  | chr1         | 200755450 | 200755950 |
  | ...          | ...       | ...       |
  | chrY         | 9536610   | 9537110   |
  | chrY         | 9346215   | 9346715   |
  | chrY         | 13702739  | 13703239  |
  | chrY         | 7273681   | 7274181   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,906 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic3': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 120424705 | 120425205 |
  | chr1         | 145415381 | 145415881 |
  | chr1         | 149093394 | 149093894 |
  | chr1         | 146241363 | 146241863 |
  | ...          | ...       | ...       |
  | chrX         | 48911531  | 48912031  |
  | chrX         | 9462840   | 9463340   |
  | chrX         | 47556039  | 47556539  |
  | chrX         | 119880613 | 119881113 |
  | chrY         | 25491379  | 25491879  |
  | chrY         | 16162378  | 16162878  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,718 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic4': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 235504810 | 235505310 |
  | chr1         | 144801579 | 144802079 |
  | chr1         | 16703091  | 16703591  |
  | chr1         | 33361612  | 33362112  |
  | ...          | ...       | ...       |
  | chrX         | 110002342 | 110002842 |
  | chrX         | 53092989  | 53093489  |
  | chrX         | 71107846  | 71108346  |
  | chrX         | 54774481  | 54774981  |
  | chrY         | 12905364  | 12905864  |
  | chrY         | 18458978  | 18459478  |
  | chrY         | 19567297  | 19567797  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,722 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic5': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 121021500 | 121022000 |
  | chr1         | 149713721 | 149714221 |
  | chr1         | 30768965  | 30769465  |
  | chr1         | 16759506  | 16760006  |
  | ...          | ...       | ...       |
  | chrX         | 155216039 | 155216539 |
  | chrX         | 49191116  | 49191616  |
  | chrX         | 48911531  | 48912031  |
  | chrX         | 153669010 | 153669510 |
  | chrY         | 19567297  | 19567797  |
  | chrY         | 2934788   | 2935288   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,464 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic6': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 121233962 | 121234462 |
  | chr1         | 145045878 | 145046378 |
  | chr1         | 206252536 | 206253036 |
  | chr1         | 144021584 | 144022084 |
  | ...          | ...       | ...       |
  | chrX         | 129785376 | 129785876 |
  | chrX         | 115900592 | 115901092 |
  | chrX         | 112845761 | 112846261 |
  | chrX         | 111112916 | 111113416 |
  | chrY         | 23818892  | 23819392  |
  | chrY         | 25850701  | 25851201  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,749 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic7': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 120917352 | 120917852 |
  | chr1         | 38873229  | 38873729  |
  | chr1         | 149611453 | 149611953 |
  | chr1         | 121237933 | 121238433 |
  | ...          | ...       | ...       |
  | chrX         | 101097353 | 101097853 |
  | chrX         | 107676819 | 107677319 |
  | chrX         | 2895397   | 2895897   |
  | chrX         | 122113215 | 122113715 |
  | chrY         | 18860918  | 18861418  |
  | chrY         | 24178267  | 24178767  |
  | chrY         | 13479979  | 13480479  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,998 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic8': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 235328157 | 235328657 |
  | chr1         | 121184357 | 121184857 |
  | chr1         | 16513668  | 16514168  |
  | chr1         | 243255144 | 243255644 |
  | ...          | ...       | ...       |
  | chrX         | 154547094 | 154547594 |
  | chrX         | 66942983  | 66943483  |
  | chrX         | 123959830 | 123960330 |
  | chrX         | 110229877 | 110230377 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,999 rows and 3 columns from 23 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic9': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 228622335 | 228622835 |
  | chr1         | 228640234 | 228640734 |
  | chr1         | 228644706 | 228645206 |
  | chr1         | 228638006 | 228638506 |
  | ...          | ...       | ...       |
  | chrX         | 69073402  | 69073902  |
  | chrX         | 71871125  | 71871625  |
  | chrX         | 119120996 | 119121496 |
  | chrX         | 120604734 | 120605234 |
  | chrY         | 20616247  | 20616747  |
  | chrY         | 9818580   | 9819080   |
  | chrY         | 19550128  | 19550628  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,922 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic10': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 143971946 | 143972446 |
  | chr1         | 145095563 | 145096063 |
  | chr1         | 206203139 | 206203639 |
  | chr1         | 121184357 | 121184857 |
  | ...          | ...       | ...       |
  | chrX         | 135521462 | 135521962 |
  | chrX         | 47144769  | 47145269  |
  | chrX         | 153470465 | 153470965 |
  | chrX         | 47361945  | 47362445  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,998 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic11': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 155166719 | 155167219 |
  | chr1         | 144247504 | 144248004 |
  | chr1         | 205284753 | 205285253 |
  | chr1         | 27615029  | 27615529  |
  | ...          | ...       | ...       |
  | chrX         | 19670375  | 19670875  |
  | chrX         | 9981625   | 9982125   |
  | chrX         | 86002669  | 86003169  |
  | chrX         | 129605782 | 129606282 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,852 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic12': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 149124227 | 149124727 |
  | chr1         | 144485312 | 144485812 |
  | chr1         | 148625500 | 148626000 |
  | chr1         | 146174565 | 146175065 |
  | ...          | ...       | ...       |
  | chrX         | 52958493  | 52958993  |
  | chrX         | 9464380   | 9464880   |
  | chrX         | 153333777 | 153334277 |
  | chrX         | 23229039  | 23229539  |
  | chrY         | 25494512  | 25495012  |
  | chrY         | 24175095  | 24175595  |
  | chrY         | 25463463  | 25463963  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,371 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic13': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 145066491 | 145066991 |
  | chr1         | 144000931 | 144001431 |
  | chr1         | 120015909 | 120016409 |
  | chr1         | 145017419 | 145017919 |
  | ...          | ...       | ...       |
  | chrX         | 114924615 | 114925115 |
  | chrX         | 13869576  | 13870076  |
  | chrX         | 154323337 | 154323837 |
  | chrX         | 24135464  | 24135964  |
  | chrY         | 3415771   | 3416271   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,751 rows and 3 columns from 25 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic14': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 146228991 | 146229491 |
  | chr1         | 148679736 | 148680236 |
  | chr1         | 149390073 | 149390573 |
  | chr1         | 120069620 | 120070120 |
  | ...          | ...       | ...       |
  | chrX         | 118345545 | 118346045 |
  | chrX         | 48891503  | 48892003  |
  | chrX         | 104165954 | 104166454 |
  | chrX         | 49155835  | 49156335  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,585 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic15': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 143803246 | 143803746 |
  | chr1         | 145157472 | 145157972 |
  | chr1         | 149711865 | 149712365 |
  | chr1         | 121019984 | 121020484 |
  | ...          | ...       | ...       |
  | chrX         | 11189160  | 11189660  |
  | chrX         | 24791741  | 24792241  |
  | chrX         | 34559228  | 34559728  |
  | chrX         | 38518704  | 38519204  |
  | chrY         | 25479257  | 25479757  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,584 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'Topic16': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 145161205 | 145161705 |
  | chr1         | 143799513 | 143800013 |
  | chr1         | 121016314 | 121016814 |
  | chr1         | 149708252 | 149708752 |
  | ...          | ...       | ...       |
  | chrY         | 7272525   | 7273025   |
  | chrY         | 15624896  | 15625396  |
  | chrY         | 24325821  | 24326321  |
  | chrY         | 19066862  | 19067362  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 2,903 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.},
 'DARs': {'L0': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 144567335 | 144567835 |
  | chr1         | 146156263 | 146156763 |
  | chr1         | 109397821 | 109398321 |
  | chr1         | 6148339   | 6148839   |
  | ...          | ...       | ...       |
  | chrX         | 41085992  | 41086492  |
  | chrX         | 15675087  | 15675587  |
  | chrX         | 129843775 | 129844275 |
  | chrX         | 154411249 | 154411749 |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 10,013 rows and 3 columns from 23 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'L1': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 155166719 | 155167219 |
  | chr1         | 247415788 | 247416288 |
  | chr1         | 108458450 | 108458950 |
  | chr1         | 149465210 | 149465710 |
  | ...          | ...       | ...       |
  | chrY         | 25850701  | 25851201  |
  | chrY         | 8298840   | 8299340   |
  | chrY         | 7312082   | 7312582   |
  | chrY         | 23818892  | 23819392  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 9,358 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'L2': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 30768965  | 30769465  |
  | chr1         | 200154266 | 200154766 |
  | chr1         | 145066491 | 145066991 |
  | chr1         | 161523462 | 161523962 |
  | ...          | ...       | ...       |
  | chrY         | 7297208   | 7297708   |
  | chrY         | 12861016  | 12861516  |
  | chrY         | 25479257  | 25479757  |
  | chrY         | 25477300  | 25477800  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 18,969 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'L3': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 121021500 | 121022000 |
  | chr1         | 30768965  | 30769465  |
  | chr1         | 200154266 | 200154766 |
  | chr1         | 145155792 | 145156292 |
  | ...          | ...       | ...       |
  | chrY         | 25463463  | 25463963  |
  | chrY         | 25494512  | 25495012  |
  | chrY         | 13479979  | 13480479  |
  | chrY         | 18458978  | 18459478  |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 16,474 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.,
  'L4': +--------------+-----------+-----------+
  | Chromosome   | Start     | End       |
  | (category)   | (int64)   | (int64)   |
  |--------------+-----------+-----------|
  | chr1         | 228622335 | 228622835 |
  | chr1         | 228640234 | 228640734 |
  | chr1         | 228644706 | 228645206 |
  | chr1         | 228608937 | 228609437 |
  | ...          | ...       | ...       |
  | chrY         | 23818892  | 23819392  |
  | chrY         | 5000424   | 5000924   |
  | chrY         | 4056001   | 4056501   |
  | chrY         | 7603140   | 7603640   |
  +--------------+-----------+-----------+
  Unstranded PyRanges object has 7,627 rows and 3 columns from 24 chromosomes.
  For printing, the PyRanges was sorted on Chromosome.}}

Version (please complete the following information):

Additional context Add any other context about the problem here.

pchiang5 commented 11 months ago

I could proceed a bit by removing nonoverlapping topics:

from ctxcore.rnkdb import FeatherRankingDatabase
from pycistarget.utils import target_to_query

db = FeatherRankingDatabase(rankings_db, name='homo_sapiens')
db_regions = db.genes

for topic in region_sets['topics_otsu'].keys():
    try:
      overlapping_regions =  target_to_query(
          region_sets['topics_otsu'][topic], 
          list(db_regions), 
          fraction_overlap = 0.4)
    except:
      print(topic)
# Topic10
# Topic14
del region_sets['topics_otsu']['Topic10']
del region_sets['topics_otsu']['Topic14']

However, the same error appeared below:

from scenicplus.wrappers.run_pycistarget import run_pycistarget
run_pycistarget(
    region_sets = region_sets,
    species = 'homo_sapiens',
    save_path = os.path.join(work_dir, 'motifs'),
    ctx_db_path = rankings_db,
    dem_db_path = scores_db,
    path_to_motif_annotations = motif_annotation,
    run_without_promoters = True,
    n_cpu = 40,
    # _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
    annotation_version = 'v10nr_clust',
    )
> 2023-08-09 15:07:42,212 pycisTarget_wrapper INFO     /mnt/c/Users/pc/Downloads/motifs folder already exists.
> 2023-08-09 15:07:43,496 pycisTarget_wrapper INFO     Loading cisTarget database for topics_otsu
> 2023-08-09 15:07:43,497 cisTarget    INFO     Reading cisTarget database
> 2023-08-09 15:10:07,267 pycisTarget_wrapper INFO     Running cisTarget for topics_otsu
> 2023-08-09 15:10:45,577 INFO worker.py:1612 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8272
> (ctx_internal_ray pid=1149855) 2023-08-09 15:10:55,864 cisTarget    INFO     Running cisTarget for Topic1 which has 2820 regions
> (ctx_internal_ray pid=1149877) 2023-08-09 15:11:01,083 cisTarget    INFO     Running cisTarget for Topic15 which has 1 regions [repeated 12x across cluster]
> (ctx_internal_ray pid=1149860) 2023-08-09 15:13:25,800 cisTarget    INFO     Running cisTarget for Topic15 which has 1 regions [repeated 2x across cluster]
> (ctx_internal_ray pid=1149883) 2023-08-09 15:13:30,649 cisTarget    INFO     Annotating motifs for Topic2
> (ctx_internal_ray pid=1149883) 2023-08-09 15:13:30,658 cisTarget    INFO     Unable to load annotation for homo_sapiens
> (ctx_internal_ray pid=1149859) 2023-08-09 15:13:27,286 cisTarget    INFO     Running cisTarget for Topic13 which has 2373 regions [repeated 2x across cluster]
> (ctx_internal_ray pid=1149883) 2023-08-09 15:13:31,224 cisTarget    INFO     Getting cistromes for Topic2
> (ctx_internal_ray pid=1149879) 2023-08-09 15:13:35,256 cisTarget    INFO     Annotating motifs for Topic12 [repeated 9x across cluster]
> (ctx_internal_ray pid=1149879) 2023-08-09 15:13:35,265 cisTarget    INFO     Unable to load annotation for homo_sapiens [repeated 9x across cluster]
> (ctx_internal_ray pid=1149878) 2023-08-09 15:13:36,091 cisTarget    INFO     Getting cistromes for Topic8 [repeated 9x across cluster]
> (ctx_internal_ray pid=1149867) 2023-08-09 15:13:36,553 cisTarget    INFO     Annotating motifs for Topic11
> (ctx_internal_ray pid=1149867) 2023-08-09 15:13:36,562 cisTarget    INFO     Unable to load annotation for homo_sapiens
> (ctx_internal_ray pid=1149867) 2023-08-09 15:13:37,070 cisTarget    INFO     Getting cistromes for Topic11
> 2023-08-09 15:14:21,253 cisTarget    INFO     Done!
> 2023-08-09 15:14:21,262 pycisTarget_wrapper INFO     Created folder : /mnt/c/Users/pc/Downloads/motifs/CTX_topics_otsu_All
> 2023-08-09 15:14:21,610 pycisTarget_wrapper INFO     Running cisTarget without promoters for topics_otsu
> ---------------------------------------------------------------------------
> AttributeError                            Traceback (most recent call last)
> Cell In[99], line 2
>       1 from scenicplus.wrappers.run_pycistarget import run_pycistarget
> ----> 2 run_pycistarget(
>       3     region_sets = region_sets,
>       4     species = 'homo_sapiens',
>       5     save_path = os.path.join(work_dir, 'motifs'),
>       6     ctx_db_path = rankings_db,
>       7     dem_db_path = scores_db,
>       8     path_to_motif_annotations = motif_annotation,
>       9     run_without_promoters = True,
>      10     n_cpu = 40,
>      11     # _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
>      12     annotation_version = 'v10nr_clust',
>      13     )
> 
> File /mnt/c/Users/pc/Downloads/scenicplus/src/scenicplus/wrappers/run_pycistarget.py:224, in run_pycistarget(region_sets, species, save_path, custom_annot, save_partial, ctx_db_path, dem_db_path, run_without_promoters, biomart_host, promoter_space, ctx_auc_threshold, ctx_nes_threshold, ctx_rank_threshold, dem_log2fc_thr, dem_motif_hit_thr, dem_max_bg_regions, annotation, motif_similarity_fdr, path_to_motif_annotations, annotation_version, n_cpu, _temp_dir, exclude_motifs, exclude_collection, **kwargs)
>     222 regions_np = {key: regions_overlaps[key][regions_overlaps[key].NumberOverlaps == 0][['Chromosome', 'Start', 'End']] for key in regions.keys()}
>     223 db_regions = set(pd.concat([ctx_db.regions_to_db[x] for x in ctx_db.regions_to_db.keys()])['Query'])
> --> 224 ctx_db.regions_to_db = {x: target_to_query(regions_np[x], list(db_regions), fraction_overlap = 0.4) for x in regions_np.keys()}
>     225 menr['CTX_'+key+'_No_promoters'] = run_cistarget(ctx_db = ctx_db,
>     226                    region_sets = regions_np,
>     227                    specie = species,
>    (...)
>     236                    annotation_version = annotation_version,
>     237                    **kwargs)
>     238 out_folder = os.path.join(save_path,'CTX_'+key+'_No_promoters')
> 
> File /mnt/c/Users/pc/Downloads/scenicplus/src/scenicplus/wrappers/run_pycistarget.py:224, in <dictcomp>(.0)
>     222 regions_np = {key: regions_overlaps[key][regions_overlaps[key].NumberOverlaps == 0][['Chromosome', 'Start', 'End']] for key in regions.keys()}
>     223 db_regions = set(pd.concat([ctx_db.regions_to_db[x] for x in ctx_db.regions_to_db.keys()])['Query'])
> --> 224 ctx_db.regions_to_db = {x: target_to_query(regions_np[x], list(db_regions), fraction_overlap = 0.4) for x in regions_np.keys()}
>     225 menr['CTX_'+key+'_No_promoters'] = run_cistarget(ctx_db = ctx_db,
>     226                    region_sets = regions_np,
>     227                    specie = species,
>    (...)
>     236                    annotation_version = annotation_version,
>     237                    **kwargs)
>     238 out_folder = os.path.join(save_path,'CTX_'+key+'_No_promoters')
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/utils.py:283, in target_to_query(target, query, fraction_overlap)
>     280     query_pr=query
>     282 join_pr = target_pr.join(query_pr, report_overlap = True)
> --> 283 join_pr.Overlap_query =  join_pr.Overlap/(join_pr.End_b - join_pr.Start_b)
>     284 join_pr.Overlap_target =  join_pr.Overlap/(join_pr.End - join_pr.Start)
>     285 join_pr = join_pr[(join_pr.Overlap_query > fraction_overlap) | (join_pr.Overlap_target > fraction_overlap)]
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pyranges/pyranges_main.py:265, in PyRanges.__getattr__(self, name)
>     240 """Return column.
>     241
>     242 Parameters
>    (...)
>     260 Name: Start, dtype: int64
>     261 """
>     263 from pyranges.methods.attr import _getattr
> --> 265 return _getattr(self, name)
> 
> File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pyranges/methods/attr.py:65, in _getattr(self, name)
>      63     return pd.concat([df[name] for df in self.values()])
>      64 else:
> ---> 65     raise AttributeError("PyRanges object has no attribute", name)
> 
> AttributeError: ('PyRanges object has no attribute', 'Overlap')
SeppeDeWinter commented 11 months ago

Hi @pchiang5

I guess for some topics all regions overlap with promotor regions, you can check by running:


  def get_species_annotation(species: str):
      dataset = pbm.Dataset(name=species,  host=biomart_host)
      annot = dataset.query(attributes=['chromosome_name', 'transcription_start_site', 'strand', 'external_gene_name', 'transcript_biotype'])
      annot.columns = ['Chromosome', 'Start', 'Strand', 'Gene', 'Transcript_type']
      annot['Chromosome'] = annot['Chromosome'].astype('str')
      filterf = annot['Chromosome'].str.contains('CHR|GL|JH|MT|KI')
      annot = annot[~filterf]
      annot['Chromosome'] = annot['Chromosome'].replace(r'(\b\S)', r'chr\1')
      annot = annot[annot.Transcript_type == 'protein_coding']
      annot = annot.dropna(subset = ['Chromosome', 'Start'])
      # Check if chromosomes have chr
      check = region_sets[list(region_sets.keys())[0]]
      if not any(['chr' in c for c in check[list(check.keys())[0]].df['Chromosome']]):
          annot.Chromosome = annot.Chromosome.str.replace('chr', '')
      if not any(['chr' in x for x in annot.Chromosome]):
          annot.Chromosome = [f'chr{x}' for x in annot.Chromosome]
      annot_dem=annot.copy()
      # Define promoter space
      annot['End'] = annot['Start'].astype(int)+promoter_space
      annot['Start'] = annot['Start'].astype(int)-promoter_space
      annot = pr.PyRanges(annot[['Chromosome', 'Start', 'End']])
      return annot, annot_dem

annot, annot_dem = get_species_annotation('hsapiens_gene_ensembl')

for topic in region_sets['topics_otsu'].keys():
   regions =  region_sets['topics_otsu'][topic]

regions_overlaps = {
      topic: region_sets["topic_otsu"][topic].count_overlaps(annot)
      for topic in region_sets['topics_otsu'].keys()}

regions_np = {
      topic: regions_overlaps[topic][regions_overlaps[topic].NumberOverlaps == 0][["Chromosome", "Start", "End"]]
      for topic in region_sets['topics_otsu'].keys()}

regions_np

That being said, there should be a check in the code for this.

Best,

Seppe

pchiang5 commented 11 months ago

Hi@SeppeDeWinter

All the topics have at least one region not in promoters (defined by +- 500bp). I tried to remove topic15 which only contains seven regions (6 in promoters), and it worked.

However, I got another immediate error:


ValueError Traceback (most recent call last) Cell In[128], line 2 1 from scenicplus.wrappers.run_pycistarget import run_pycistarget ----> 2 run_pycistarget( 3 region_sets = region_sets, 4 species = 'homo_sapiens', 5 save_path = os.path.join(work_dir, 'motifs'), 6 ctx_db_path = rankings_db, 7 dem_db_path = scores_db, 8 path_to_motif_annotations = motif_annotation, 9 run_without_promoters = True, 10 n_cpu = 1, 11 _temp_dir = os.path.join(tmp_dir, 'ray_spill'), 12 annotation_version = 'v10nr_clust', 13 )

File /mnt/c/Users/pc/Downloads/scenicplus/src/scenicplus/wrappers/run_pycistarget.py:256, in run_pycistarget(region_sets, species, save_path, custom_annot, save_partial, ctx_db_path, dem_db_path, run_without_promoters, biomart_host, promoter_space, ctx_auc_threshold, ctx_nes_threshold, ctx_rank_threshold, dem_log2fc_thr, dem_motif_hit_thr, dem_max_bg_regions, annotation, motif_similarity_fdr, path_to_motif_annotations, annotation_version, n_cpu, _temp_dir, exclude_motifs, exclude_collection, **kwargs) 254 log.info('Running DEM for '+key) 255 regions = region_sets[key] --> 256 dem_db = DEMDatabase(dem_db_path, regions) 257 if exclude_motifs is not None: 258 out = pd.read_csv(exclude_motifs, header=None).iloc[:,0].tolist()

File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_dem.py:67, in DEMDatabase.init(self, fname, region_sets, name, fraction_overlap) 48 def init(self, 49 fname: str, 50 region_sets: Dict[str, pr.PyRanges] = None, 51 name: str = None, 52 fraction_overlap: float = 0.4): 53 """ 54 Initialize DEMDatabase 55 (...) 65 Minimal overlap between query and regions in the database for the mapping. 66 """ ---> 67 self.regions_to_db, self.db_scores = self.load_db(fname, 68 region_sets, 69 name, 70 fraction_overlap)

File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/pycistarget/motif_enrichment_dem.py:111, in DEMDatabase.load_db(self, fname, region_sets, name, fraction_overlap) 109 if name is None: 110 name = os.path.basename(fname) --> 111 db = FeatherRankingDatabase(fname, name=name) 112 db_regions = db.genes 113 if region_sets is not None:

File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/ctxcore/rnkdb.py:109, in FeatherRankingDatabase.init(self, fname, name) 106 assert os.path.isfile(fname), """Database "{fname}" doesn't exist.""" 108 self._fname = fname --> 109 self.ct_db = CisTargetDatabase.init_ct_db( 110 ct_db_filename=self._fname, engine="pyarrow" 111 )

File /home/pc/miniconda3/envs/scenicplus/lib/python3.8/site-packages/ctxcore/ctdb.py:170, in CisTargetDatabase.init_ct_db(ct_db_filename, engine) 167 feather_v1_or_v2 = is_feather_v1_or_v2(ct_db_filename) 169 if not feather_v1_or_v2: --> 170 raise ValueError( 171 f'"{ct_db_filename}" is not a cisTarget Feather database in Feather v1 or v2 format.' 172 ) 173 elif feather_v1_or_v2 == 1: 174 raise ValueError( 175 f'"{ct_db_filename}" is a cisTarget Feather database in Feather v1 format, which is not supported ' 176 f'anymore. Convert them with "convert_cistarget_databases_v1_to_v2.py" ' 177 "(https://github.com/aertslab/create_cisTarget_databases/) to Feather v2 format." 178 )

ValueError: "/mnt/c/Users/pc/Downloads/hg38_screen_v10_clust.regions_vs_motifs.scores.feather" is not a cisTarget Feather database in Feather v1 or v2 format.

The file was downloaded from https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/screen/mc_v10_clust/region_based/

Does it mean I have to perform a conversion on the feather file? Thanks

pchiang5 commented 11 months ago

The error was resolved after I re-downloaded the feather from the website.