aertslab / pycisTopic

pycisTopic is a Python module to simultaneously identify cell states and cis-regulatory topics from single cell epigenomics data.
Other
58 stars 12 forks source link

export_pseudobulk error #180

Closed sugiYag closed 1 month ago

sugiYag commented 1 month ago

I am writing the code by following the tutorial. However, an error occurs in the process of turning the code attached below.

What kind of data should be in the tmp file and doesn't it automatically make it?

from pycisTopic.pseudobulk_peak_calling import export_pseudobulk
os.makedirs(os.path.join(out_dir, "consensus_peak_calling"), exist_ok = True)
os.makedirs(os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bed_files"), exist_ok = True)
os.makedirs(os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bw_files"), exist_ok = True)

bw_paths, bed_paths = export_pseudobulk(
    input_data = cell_data,
    variable = "VSN_cell_type",
    sample_id_col = "VSN_sample_id",
    chromsizes = chromsizes,
    bed_path = os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bed_files"),
    bigwig_path = os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bw_files"),
    path_to_fragments = fragments_dict,
    n_cpu = 10,
    normalize_bigwig = True,
    temp_dir = "/tmp",
    split_pattern = "-"
)

ValueError Traceback (most recent call last) Cell In[15], line 7 3 os.makedirs(os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bed_files"), exist_ok = True) 4 os.makedirs(os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bw_files"), exist_ok = True) ----> 7 bw_paths, bed_paths = export_pseudobulk( 8 input_data = cell_data, 9 variable = "Status", 10 sample_id_col = "sample", 11 chromsizes = chromsizes, 12 bed_path = os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bed_files"), 13 bigwig_path = os.path.join(out_dir, "consensus_peak_calling/pseudobulk_bw_files"), 14 path_to_fragments = fragments_dict, 15 n_cpu = 10, 16 normalize_bigwig = True, 17 temp_dir = "/tmp", 18 split_pattern = "-" 19 )

File ~/anaconda3/envs/re_scenic/lib/python3.11/site-packages/pycisTopic/pseudobulk_peak_calling.py:162, in export_pseudobulk(input_data, variable, chromsizes, bed_path, bigwig_path, path_to_fragments, sample_id_col, n_cpu, normalize_bigwig, split_pattern, temp_dir) 159 # For each sample, get fragments for each cell type 161 log.info("Splitting fragments by cell type.") --> 162 split_fragment_files_by_cell_type( 163 sample_to_fragment_file = path_to_fragments, 164 path_to_temp_folder = temp_dir, 165 path_to_output_folder = bed_path, 166 sample_to_cell_type_to_cell_barcodes = sample_to_cell_type_to_barcodes, 167 chromsizes = chromsizes_dict, 168 n_cpu = n_cpu, 169 verbose = False, 170 clear_temp_folder = True 171 ) 173 bed_paths = {} 174 for cell_type in cell_data[variable].unique():

File ~/anaconda3/envs/re_scenic/lib/python3.11/site-packages/scatac_fragment_tools/library/split/split_fragments_by_cell_type.py:92, in split_fragment_files_by_cell_type(sample_to_fragment_file, path_to_temp_folder, path_to_output_folder, sample_to_cell_type_to_cell_barcodes, chromsizes, n_cpu, verbose, clear_temp_folder) 90 path_to_fragment_file = os.path.join(path_to_temp_folder, sample, f"{cell_type_sanitized}.fragments.tsv.gz") 91 if not os.path.exists(path_to_fragment_file): ---> 92 raise ValueError(f"Fragment file {path_to_fragment_file} does not exist.") 93 if cell_type_sanitized not in cell_type_to_fragment_files: 94 cell_type_to_fragment_files[cell_type_sanitized] = []

ValueError: Fragment file /tmp/HDv/Healthy.fragments.tsv.gz does not exist.