ENCODE-DCC / chip-seq-pipeline2

ENCODE ChIP-seq pipeline
MIT License
241 stars 123 forks source link

Call-qc failed due an error #212

Open andrewucla opened 3 years ago

andrewucla commented 3 years ago

Describe the bug

It seems the pipeline has an error on the qc step.

==== NAME=chip.qc_report, STATUS=RetryableFailure, PARENT= SHARD_IDX=-1, RC=1, JOB_ID=55217 START=2021-02-16T10:19:34.569Z, END=2021-02-16T10:19:51.775Z STDOUT=/gpfs/fs1/data/shenlab/lw/chip-seq-pipeline2/run_pipeline/OP/K36me3high/chip/65302236-00d0-4f5c-af80-47836f267413/call-qc_report/execution/stdout STDERR=/gpfs/fs1/data/shenlab/lw/chip-seq-pipeline2/run_pipeline/OP/K36me3high/chip/65302236-00d0-4f5c-af80-47836f267413/call-qc_report/execution/stderr STDERR_CONTENTS= Traceback (most recent call last): File "/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_task_qc_report.py", line 927, in main() File "/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_task_qc_report.py", line 890, in main make_cat_align_enrich(args, cat_root) File "/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_task_qc_report.py", line 737, in make_cat_align_enrich cat_jsd.add_log(qc, key=str_rep(i)) File "/gpfs/fs1/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_lib_qc_category.py", line 159, in add_log parser=self._parser, File "/gpfs/fs1/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_lib_qc_category.py", line 43, in init self.__parse() File "/gpfs/fs1/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_lib_qc_category.py", line 90, in __parse self._dict = self._parser(self._log_file) File "/gpfs/fs1/data/shenlab/lw/miniconda3/envs/encode-chip-seq-pipeline/bin/encode_lib_log_parser.py", line 421, in parse_jsd_qc result['jsd'] = float(arr[6]) ValueError: could not convert string to float: 'NA'

OS/Platform

Caper configuration file

Paste contents of ~/.caper/default.conf.

backend=slurm

# define one of the followings (or both) according to your
# cluster's SLURM configuration.
slurm-partition=
slurm-account=

# Hashing strategy for call-caching (3 choices)
# This parameter is for local (local/slurm/sge/pbs) backend only.
# This is important for call-caching,
# which means re-using outputs from previous/failed workflows.
# Cache will miss if different strategy is used.
# "file" method has been default for all old versions of Caper<1.0.
# "path+modtime" is a new default for Caper>=1.0,
#   file: use md5sum hash (slow).
#   path: use path.
#   path+modtime: use path and modification time.
local-hash-strat=path+modtime

# Local directory for localized files and Cromwell's intermediate files
# If not defined, Caper will make .caper_tmp/ on local-out-dir or CWD.
# /tmp is not recommended here since Caper store all localized data files
# on this directory (e.g. input FASTQs defined as URLs in input JSON).
local-loc-dir=

cromwell=/home/lw227/.caper/cromwell_jar/cromwell-52.jar
womtool=/home/lw227/.caper/womtool_jar/womtool-52.jar

Input JSON file

Paste contents of your input JSON file.

{
    "chip.title" : "high_27ac",
    "chip.description" : "high_27ac",

    "chip.pipeline_type" : "histone",
    "chip.peak_caller" : "macs2",
    "chip.align_only" : false,
    "chip.true_rep_only" : false,

    "chip.genome_tsv" : "/data/shenlab/lw/chip-seq-pipeline2/genome/hg19/hg19.tsv",
    "chip.paired_end" : true,
    "chip.ctl_paired_end" : true,

    "chip.fastqs_rep1_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib58_CKDL200150158-1a-AK4949_H7N2KBBXX_L6/OP27.R1.paired.fq.gz" ],
    "chip.fastqs_rep1_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib58_CKDL200150158-1a-AK4949_H7N2KBBXX_L6/OP27.R2.paired.fq.gz" ],
    "chip.fastqs_rep2_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP17.R1.paired.fq.gz" ],
    "chip.fastqs_rep2_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP17.R2.paired.fq.gz" ],
    "chip.fastqs_rep3_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP34.R1.paired.fq.gz" ],
    "chip.fastqs_rep3_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP34.R2.paired.fq.gz" ],
    "chip.fastqs_rep4_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib72_CKDL200150160-1a-AK4949_H7N2KBBXX_L8/OP18.R1.paired.fq.gz" ],
    "chip.fastqs_rep4_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib72_CKDL200150160-1a-AK4949_H7N2KBBXX_L8/OP18.R2.paired.fq.gz" ],
    "chip.fastqs_rep5_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP33.R1.paired.fq.gz" ],
    "chip.fastqs_rep5_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP33.R2.paired.fq.gz" ],
    "chip.fastqs_rep6_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP12.R1.paired.fq.gz" ],
    "chip.fastqs_rep6_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib65_CKDL200150159-1a-AK4949_H7N2KBBXX_L7/OP12.R2.paired.fq.gz" ],
    "chip.fastqs_rep7_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib58_CKDL200150158-1a-AK4949_H7N2KBBXX_L6/OP9.R1.paired.fq.gz" ],
    "chip.fastqs_rep7_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib58_CKDL200150158-1a-AK4949_H7N2KBBXX_L6/OP9.R2.paired.fq.gz" ],
    "chip.fastqs_rep8_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib72_CKDL200150160-1a-AK4949_H7N2KBBXX_L8/OP30.R1.paired.fq.gz" ],
    "chip.fastqs_rep8_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib72_CKDL200150160-1a-AK4949_H7N2KBBXX_L8/OP30.R2.paired.fq.gz" ],
    "chip.fastqs_rep9_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib72_CKDL200150160-1a-AK4949_H7N2KBBXX_L8/OP3.R1.paired.fq.gz" ],
    "chip.fastqs_rep9_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib72_CKDL200150160-1a-AK4949_H7N2KBBXX_L8/OP3.R2.paired.fq.gz" ],

    "chip.ctl_fastqs_rep1_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib57_CKDL200150158-1a-AK2721_H7N2KBBXX_L6/OP27.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep1_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib57_CKDL200150158-1a-AK2721_H7N2KBBXX_L6/OP27.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep2_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP17.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep2_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP17.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep3_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP34.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep3_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP34.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep4_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib71_CKDL200150160-1a-AK2721_H7N2KBBXX_L8/OP18.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep4_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib71_CKDL200150160-1a-AK2721_H7N2KBBXX_L8/OP18.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep5_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP33.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep5_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP33.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep6_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP12.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep6_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib64-70/trimmed/Lib64_CKDL200150159-1a-AK2721_H7N2KBBXX_L7/OP12.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep7_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib57_CKDL200150158-1a-AK2721_H7N2KBBXX_L6/OP9.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep7_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib57-63/trimmed/Lib57_CKDL200150158-1a-AK2721_H7N2KBBXX_L6/OP9.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep8_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib71_CKDL200150160-1a-AK2721_H7N2KBBXX_L8/OP30.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep8_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib71_CKDL200150160-1a-AK2721_H7N2KBBXX_L8/OP30.R2.paired.fq.gz" ],
    "chip.ctl_fastqs_rep9_R1" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib71_CKDL200150160-1a-AK2721_H7N2KBBXX_L8/OP3.R1.paired.fq.gz" ],
    "chip.ctl_fastqs_rep9_R2" : [ "/data/shenlab/lw/OP/for-Ergang/mint-chip/lib71-77/trimmed/Lib71_CKDL200150160-1a-AK2721_H7N2KBBXX_L8/OP3.R2.paired.fq.gz" ],

    "chip.crop_length" : 0,

    "chip.mapq_thresh" : 30,
    "chip.dup_marker" : "picard",
    "chip.no_dup_removal" : false,

    "chip.subsample_reads" : 0,
    "chip.ctl_subsample_reads" : 0,
    "chip.xcor_subsample_reads" : 15000000,

    "chip.xcor_trim_bp" : 50,
    "chip.use_filt_pe_ta_for_xcor" : false,

    "chip.always_use_pooled_ctl" : true,
    "chip.ctl_depth_ratio" : 1.2,

    "chip.peak_caller" : null,
    "chip.cap_num_peak" : 500000,
    "chip.pval_thresh" : 0.01,
    "chip.fdr_thresh" : 0.01,
    "chip.idr_thresh" : 0.05,

    "chip.enable_jsd" : true,
    "chip.enable_gc_bias" : true,
    "chip.enable_count_signal_track" : false,

    "chip.filter_chrs" : [],

    "chip.align_cpu" : 6,
    "chip.align_bowtie2_mem_factor" : 0.15,
    "chip.align_bwa_mem_factor" : 0.15,
    "chip.align_time_hr" : 48,
    "chip.align_bowtie2_disk_factor" : 8.0,
    "chip.align_bwa_disk_factor" : 8.0,

    "chip.filter_cpu" : 4,
    "chip.filter_mem_factor" : 0.4,
    "chip.filter_time_hr" : 24,
    "chip.filter_disk_factor" : 6.0,

    "chip.bam2ta_cpu" : 2,
    "chip.bam2ta_mem_factor" : 0.35,
    "chip.bam2ta_time_hr" : 6,
    "chip.bam2ta_disk_factor" : 4.0,

    "chip.spr_mem_factor" : 4.5,
    "chip.spr_disk_factor" : 6.0,

    "chip.jsd_cpu" : 4,
    "chip.jsd_mem_factor" : 0.1,
    "chip.jsd_time_hr" : 6,
    "chip.jsd_disk_factor" : 2.0,

    "chip.xcor_cpu" : 2,
    "chip.xcor_mem_factor" : 1.0,
    "chip.xcor_time_hr" : 24,
    "chip.xcor_disk_factor" : 4.5,

    "chip.subsample_ctl_mem_factor" : 7.0,
    "chip.subsample_ctl_disk_factor" : 7.5,

    "chip.call_peak_cpu" : 6,
    "chip.call_peak_spp_mem_factor" : 5.0,
    "chip.call_peak_macs2_mem_factor" : 2.5,
    "chip.call_peak_time_hr" : 72,
    "chip.call_peak_spp_disk_factor" : 5.0,
    "chip.call_peak_macs2_disk_factor" : 15.0,

    "chip.macs2_signal_track_mem_factor" : 6.0,
    "chip.macs2_signal_track_time_hr" : 24,
    "chip.macs2_signal_track_disk_factor" : 40.0
}
andrewucla commented 3 years ago

I do find that in one of my rep.jsd.qc files it has NA in it.

0.0037128227321947906 0.4242645622054558 0.9865692356303221 0.12557735764150363 0.9866592273510837 0.5149746205118282 NA 0.13071263669266958 NA NA NA

What should I do in order to get the QC report going?

leepc12 commented 3 years ago

Let's disable jsd task for now. Add the following to your input JSON.

{
    "chip.enable_jsd": false
}

Re-submit the workflow with the same WDL and a modified input JSON and then pipeline will start from qc_report (final task of the pipeline) by re-using outputs of all upstream analyses.