Open nick-youngblut opened 1 month ago
I'm guessing that the issue comes from the code block:
if (!map_collection_clone.any{ it.feature_type == 'gex' }) { map_collection_clone.add( [id: sample_id, feature_type: 'gex' , gex: empty_file, options:[:] ] ) }
if (!map_collection_clone.any{ it.feature_type == 'vdj' }) { map_collection_clone.add( [id: sample_id, feature_type: 'vdj' , vdj: empty_file, options:[:] ] ) }
if (!map_collection_clone.any{ it.feature_type == 'ab' }) { map_collection_clone.add( [id: sample_id, feature_type: 'ab' , ab: empty_file, options:[:] ] ) }
if (!map_collection_clone.any{ it.feature_type == 'beam' }) { map_collection_clone.add( [id: sample_id, feature_type: 'beam' , beam: empty_file, options:[:] ] ) } // currently not implemented, the input samplesheet checking will not allow it.
if (!map_collection_clone.any{ it.feature_type == 'crispr' }) { map_collection_clone.add( [id: sample_id, feature_type: 'crispr', crispr: empty_file, options:[:] ] ) }
if (!map_collection_clone.any{ it.feature_type == 'cmo' }) { map_collection_clone.add( [id: sample_id, feature_type: 'cmo' , cmo: empty_file, options:[:] ] ) }
The following modification seems to fix the issue:
ch_fastq
.flatten()
.map{ meta ->
def meta_clone = meta.clone()
def data_dict = meta_clone.find{ it.key == "${meta_clone.feature_type}" }
fastqs = data_dict?.value
meta_clone.remove( data_dict?.key )
[ meta_clone, fastqs ]
}
.filter { meta, fastq ->
// Exclude entries where fastq is 'EMPTY'
if (fastq instanceof List) {
return true // Keep entries where fastq is a list of files
} else {
return !fastq.toString().endsWith('/EMPTY')
}
}
.branch {
meta, fastq ->
gex: meta.feature_type == "gex"
return [ meta, fastq ]
vdj: meta.feature_type == "vdj"
return [ meta, fastq ]
ab: meta.feature_type == "ab"
if (params.fb_reference){
return [ meta, fastq ]
} else {
error ("Antibody reference was not specified. Please provide a reference file for feature barcoding (e.g. antibody measurements).\nPlease refer to https://www.10xgenomics.com/support/software/cell-ranger/latest/analysis/inputs/cr-feature-ref-csv for more details.")
}
beam: meta.feature_type == "beam"
return [ meta, fastq ]
crispr: meta.feature_type == "crispr"
return [ meta, fastq ]
cmo: meta.feature_type == "cmo"
return [ meta, fastq ]
}
.set { ch_grouped_fastq }
I'm guessing that this issue was missed, since the test data cellrangermulti_samplesheet.csv contains the ab
feature type.
A problem with my updated code is that CELLRANGER_MULTI
never runs. The run trace:
task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar
2 a4/e5b1d1 35330 NFCORE_SCRNASEQ:SCRNASEQ:CELLRANGER_MULTI_ALIGN:PARSE_CELLRANGERMULTI_SAMPLESHEET COMPLETED 0 2024-10-03 11:15:25.868 4.5s 0ms 41.1% 9.1 MB 14.9 MB 932.9 KB 633 B
1 b8/327626 35328 NFCORE_SCRNASEQ:SCRNASEQ:GTF_GENE_FILTER (genome.fa) COMPLETED 0 2024-10-03 11:15:25.807 14.5s 9s 82.0% 1.1 GB 1.1 GB 3.5 GB 927.5 MB
4 61/37c05e 35331 NFCORE_SCRNASEQ:SCRNASEQ:CELLRANGER_MULTI_ALIGN:CELLRANGER_MKGTF (genome_genes.gtf) COMPLETED 0 2024-10-03 11:15:40.396 59.9s 55.3s 99.0% 61.1 MB 247.3 MB 935.7 MB 927.5 MB
5 b1/0a59d6 35332 NFCORE_SCRNASEQ:SCRNASEQ:CELLRANGER_MULTI_ALIGN:CELLRANGER_MKREF (genome.fa) COMPLETED 0 2024-10-03 11:16:40.459 11m 40s 11m 34s 452.3% 17.2 GB 24.4 GB 39.6 GB 28.2 GB
3 b8/33167b 35329 NFCORE_SCRNASEQ:SCRNASEQ:FASTQC_CHECK:FASTQC (NPC_Astro_Diff_aBeta) COMPLETED 0 2024-10-03 11:15:25.844 2h 35m 35s 2h 35m 32s 140.1% 5.2 GB 63.7 GB 118.2 GB 4 MB
6 67/f16c82 35381 NFCORE_SCRNASEQ:SCRNASEQ:MULTIQC COMPLETED 0 2024-10-03 13:51:01.396 19.9s 13.2s 57.6% 646.2 MB 9.8 GB 91.2 MB 24.3 MB
I ended up using the following:
main:
ch_versions = Channel.empty()
//
// TODO: Include checkers for cellranger multi parameter combinations. For example, when VDJ data is given, require VDJ ref. If FFPE, require frna probe sets, etc.
//
// since we merged all data as a meta, now we have a channel per sample, which
// every item is a meta map for each data-type
// now we can split it back for passing as input to the module
ch_fastq
.flatten()
.map{ meta ->
def meta_clone = meta.clone()
def data_dict = meta_clone.find{ it.key == "${meta_clone.feature_type}" }
fastqs = data_dict?.value
meta_clone.remove( data_dict?.key )
[ meta_clone, fastqs ]
}
.branch {
meta, fastq ->
gex: meta.feature_type == "gex"
return [ meta, fastq ]
vdj: meta.feature_type == "vdj"
return [ meta, fastq ]
ab: meta.feature_type == "ab"
return [ meta, fastq ]
beam: meta.feature_type == "beam"
return [ meta, fastq ]
crispr: meta.feature_type == "crispr"
return [ meta, fastq ]
cmo: meta.feature_type == "cmo"
return [ meta, fastq ]
}
.set { ch_grouped_fastq }
...with the following nextflow command:
nextflow run main.nf \
-ansi-log false \
-profile singularity \
-process.executor slurm \
-process.queue cpu_batch \
-work-dir /scratch/$(id -gn)/$(whoami)/nextflow-work/scrnaseq \
--aligner cellrangermulti \
--skip_cellrangermulti_vdjref \
--skip_emptydrops \
--gex_frna_probe_set Chromium_Human_Transcriptome_Probe_Set_v1.0.1_GRCh38-2020-A.csv \
--cellranger_multi_barcodes sample_barcodes.csv \
--cellranger_index refdata-gex-GRCh38-2024-A/ \
--input samples.csv \
--outdir scrnaseq_output
Description of the bug
sample.csv:
sample_barcodes.csv:
The error:
Running the pipeline with:
Shows:
...which then results in the
ab
error in thebranch
operation:Where are the extra feature types (e.g.,
vdj
andab
) coming from?I believe that I have my input csv files set up as in cellrangermulti_samplesheet.csv and cellranger_barcodes_samplesheet.csv.
Command used and terminal output
Relevant files
No response
System information