Closed cimendes closed 9 months ago
Running cromwell tests locally on my devVM (8cpus 32GB RAM, ample storage available), using an identical conda/mamba environment.
These commands were run inside the public_health_bioinformatics
root directory, so all WDLs and other files are available:
fasta_abricate_hits.tsv
# capturing flu type (A or B based on M1 hit) and subtype (e.g. H1 and N1 based on HA/NA hits)
## awk for gene column ($6) to grab subtype ($15)
cat fasta_abricate_hits.tsv | awk -F '\t' '{if ($6=="M1") print $15}' > FLU_TYPE
HA_hit=$(cat fasta_abricate_hits.tsv | awk -F '\t' '{if ($6=="HA") print $15 }')
NA_hit=$(cat fasta_abricate_hits.tsv | awk -F '\t' '{if ($6=="NA") print $15 }')
if [[ ! (-z "${HA_hit}") && ! (-z "${NA_hit}") ]]; then
flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" > FLU_SUBTYPE
fi
if [[ -z "${HA_hit}" ]]; then
flu_subtype="${NA_hit}" && echo "$flu_subtype" > FLU_SUBTYPE
elif [[ -z "${NA_hit}" ]]; then
flu_subtype="${HA_hit}" && echo "$flu_subtype" > FLU_SUBTYPE
else
flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" > FLU_SUBTYPE
fi
#flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" > FLU_SUBTYPE
# set nextclade variables based on subptype
run_nextclade=true
touch NEXTCLADE_REF_HA NEXTCLADE_REF_NA NEXTCLADE_NAME_HA NEXTCLADE_NAME_NA NEXTCLADE_DS_TAG_HA NEXTCLADE_DS_TAG_NA
if [ "${flu_subtype}" == "H1N1" ]; then
echo "flu_h1n1pdm_ha" > NEXTCLADE_NAME_HA
echo "MW626062" > NEXTCLADE_REF_HA
echo "" > NEXTCLADE_DS_TAG_HA
echo "flu_h1n1pdm_na" > NEXTCLADE_NAME_NA
echo "MW626056" > NEXTCLADE_REF_NA
echo "" > NEXTCLADE_DS_TAG_NA
elif [ "${flu_subtype}" == "H3N2" ]; then
echo "flu_h3n2_ha" > NEXTCLADE_NAME_HA
echo "CY163680" > NEXTCLADE_REF_HA
echo "" > NEXTCLADE_DS_TAG_HA
echo "flu_h3n2_na" > NEXTCLADE_NAME_NA
echo "EPI1857215" > NEXTCLADE_REF_NA
echo "" > NEXTCLADE_DS_TAG_NA
elif [ "${flu_subtype}" == "Victoria" ]; then
echo "flu_vic_ha" > NEXTCLADE_NAME_HA
echo "KX058884" > NEXTCLADE_REF_HA
echo "" > NEXTCLADE_DS_TAG_HA
echo "flu_vic_na" > NEXTCLADE_NAME_NA
echo "CY073894" > NEXTCLADE_REF_NA
echo "" > NEXTCLADE_DS_TAG_NA
elif [ "${flu_subtype}" == "Yamagata" ]; then
echo "flu_yam_ha" > NEXTCLADE_NAME_HA
echo "JN993010" > NEXTCLADE_REF_HA
echo "" > NEXTCLADE_DS_TAG_HA
# this makes no biological sense, but avoids errors with nextclade
echo "flu_vic_na" > NEXTCLADE_NAME_NA
echo "CY073894" > NEXTCLADE_REF_NA
echo "" > NEXTCLADE_DS_TAG_NA
else
run_nextclade=false
fi
echo ${run_nextclade} > RUN_NEXTCLADE
[2024-01-10 13:55:22,41] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.version_capture:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture:/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture:delegated \
us-docker.pkg.dev/general-theiagen/ubuntu/ubuntu@sha256:9a35cef02b57290a7c919824b632897fbe6db3aa18c930110b04d81fcad458ff /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-version_capture/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 13:55:22,41] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.pangolin4:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4:/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4:delegated \
us-docker.pkg.dev/general-theiagen/staphb/pangolin@sha256:54b03e235ee04f007417cb934d1a263ce73bbd16aa82134efa680bd5d856c38d /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-pangolin4/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 13:55:22,42] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.abricate_flu:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu:/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu:delegated \
us-docker.pkg.dev/general-theiagen/staphb/abricate@sha256:8a5a7ef82ed6d1f8bddd97197e93a55efc467e3ba978830126c97919ca60855d /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-abricate_flu/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 13:55:25,98] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.pangolin4:NA:1]: job id: 42037
[2024-01-10 13:55:25,98] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.abricate_flu:NA:1]: job id: 42038
[2024-01-10 13:55:25,98] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.version_capture:NA:1]: job id: 42035
[2024-01-10 13:55:25,98] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.pangolin4:NA:1]: Status change from - to WaitingForReturnCode
[2024-01-10 13:55:25,98] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.abricate_flu:NA:1]: Status change from - to Done
[2024-01-10 13:55:25,99] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.version_capture:NA:1]: Status change from - to Done
[2024-01-10 13:55:35,87] [info] b1079133-79da-47ea-87f4-4d12c22a7fef-SubWorkflowActor-SubWorkflow-organism_parameters:-1:1 [b1079133]: Workflow organism_parameters complete. Final Outputs:
{
"organism_parameters.vadr_maxlen": 30000,
"organism_parameters.primer_bed": "gs://theiagen-public-files/terra/theiacov-files/empty.bed",
"organism_parameters.nextclade_dataset_name": "sars-cov-2",
"organism_parameters.kraken_target_organism": "",
"organism_parameters.genome_len": 29903,
"organism_parameters.vadr_opts": "--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta",
"organism_parameters.nextclade_dataset_tag": "2023-08-17T12:00:00Z",
"organism_parameters.reference": "tests/inputs/completely-empty-for-test.txt",
"organism_parameters.nextclade_dataset_reference": "MN908947",
"organism_parameters.reference_gff": "gs://theiagen-public-files/terra/theiacov-files/empty.gff3"
}
[2024-01-10 13:55:37,86] [info] WorkflowExecutionActor-6d31935b-042f-4566-b5b7-a155b681d22c [6d31935b]: Starting theiacov_fasta.consensus_qc
[2024-01-10 13:55:39,90] [info] WorkflowExecutionActor-6d31935b-042f-4566-b5b7-a155b681d22c [6d31935b]: Starting theiacov_fasta.nextclade
[2024-01-10 13:55:41,11] [info] Assigned new job execution tokens to the following groups: 6d31935b: 2
[2024-01-10 13:55:41,33] [warn] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 13:55:41,34] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade:NA:1]: NEXTCLADE_VERSION="$(nextclade --version)"
echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
nextclade dataset get --name="sars-cov-2" --reference="MN908947" --tag="2023-08-17T12:00:00Z" -o nextclade_dataset_dir --verbose
set -e
nextclade run \
--input-dataset=nextclade_dataset_dir/ \
\
\
\
\
\
\
--output-json "clearlabs.fasta.gz".nextclade.json \
--output-tsv "clearlabs.fasta.gz".nextclade.tsv \
--output-tree "clearlabs.fasta.gz".nextclade.auspice.json \
--output-all=. \
"/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/inputs/1305340193/clearlabs.fasta.gz"
[2024-01-10 13:55:41,35] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade:/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade:delegated \
us-docker.pkg.dev/general-theiagen/nextstrain/nextclade@sha256:f4440021c7f854433bd56433024f38199de27e0ef657e11d5fb28aad4f265a48 /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 13:55:41,38] [warn] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.consensus_qc:NA:1]: Unrecognized runtime attribute keys: preemptible, disk, disks, cpu, memory
[2024-01-10 13:55:41,39] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.consensus_qc:NA:1]: if [ -s "/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/inputs/-273586876/completely-empty-for-test.txt" ] ; then
GENOME_LEN=$(grep -v ">" /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/inputs/-273586876/completely-empty-for-test.txt | tr --delete '\n' | wc -c)
elif [ 29903 ] ; then
GENOME_LEN=29903
else
# set SC2 default
GENOME_LEN=29903
fi
# capture date and version
date | tee DATE
num_N=$( grep -v ">" /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/inputs/1305340193/clearlabs.fasta.gz | grep -o 'N' | wc -l )
if [ -z "$num_N" ] ; then num_N="0" ; fi
echo $num_N | tee NUM_N
num_ACTG=$( grep -v ">" /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/inputs/1305340193/clearlabs.fasta.gz | grep -o -E "C|A|T|G" | wc -l )
if [ -z "$num_ACTG" ] ; then num_ACTG="0" ; fi
echo $num_ACTG | tee NUM_ACTG
# calculate percent coverage (Wu Han-1 genome length: 29903bp)
python3 -c "print ( round( ($num_ACTG / $GENOME_LEN ) * 100, 2 ) )" | tee PERCENT_REF_COVERAGE
num_degenerate=$( grep -v ">" /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/inputs/1305340193/clearlabs.fasta.gz | grep -o -E "B|D|E|F|H|I|J|K|L|M|O|P|Q|R|S|U|V|W|X|Y|Z" | wc -l )
if [ -z "$num_degenerate" ] ; then num_degenerate="0" ; fi
echo $num_degenerate | tee NUM_DEGENERATE
num_total=$( grep -v ">" /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/inputs/1305340193/clearlabs.fasta.gz | grep -o -E '[A-Z]' | wc -l )
if [ -z "$num_total" ] ; then num_total="0" ; fi
echo $num_total | tee NUM_TOTAL
[2024-01-10 13:55:41,43] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.consensus_qc:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc:/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc:delegated \
us-docker.pkg.dev/general-theiagen/theiagen/utility@sha256:b40130a48b94d64db4520ad8580c72cbcb8ea16bba486bc315096c2c3ec8cace /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-consensus_qc/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 13:55:45,96] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade:NA:1]: job id: 42765
[2024-01-10 13:55:45,96] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.consensus_qc:NA:1]: job id: 42778
[2024-01-10 13:55:45,96] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade:NA:1]: Status change from - to Done
[2024-01-10 13:55:45,96] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.consensus_qc:NA:1]: Status change from - to Done
[2024-01-10 13:55:47,04] [info] WorkflowExecutionActor-6d31935b-042f-4566-b5b7-a155b681d22c [6d31935b]: Starting theiacov_fasta.vadr
[2024-01-10 13:55:49,50] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.pangolin4:NA:1]: Status change from WaitingForReturnCode to Done
[2024-01-10 13:55:51,11] [info] Assigned new job execution tokens to the following groups: 6d31935b: 1
[2024-01-10 13:55:51,12] [info] WorkflowExecutionActor-6d31935b-042f-4566-b5b7-a155b681d22c [6d31935b]: Starting theiacov_fasta.nextclade_output_parser
[2024-01-10 13:55:51,45] [warn] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.vadr:NA:1]: Unrecognized runtime attribute keys: dx_instance_type, cpu, memory
[2024-01-10 13:55:51,46] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.vadr:NA:1]: set -e
if [ 0 -gt 10000 ]; then
# remove terminal ambiguous nucleotides
/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \
"/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr/inputs/1305340193/clearlabs.fasta.gz" \
--minlen 50 \
--maxlen 30000 \
> "clearlabs.fasta.gz_trimmed.fasta"
# run VADR
# --split and --cpu must be used in conjuction
v-annotate.pl \
--split --cpu 2 \
--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta \
"clearlabs.fasta.gz_trimmed.fasta" \
"clearlabs.fasta.gz"
# package everything for output
tar -C "clearlabs.fasta.gz" -czvf "clearlabs.fasta.gz.vadr.tar.gz" .
# package up FASTA files into zip file for output. Note: this will work whether the --out_allfasta flag is included or not (there are just more when the option is used)
mkdir -v vadr_fasta_files
cp -v clearlabs.fasta.gz/*.fa vadr_fasta_files
zip clearlabs.fasta.gz_vadr-fasta-files.zip vadr_fasta_files/*.fa
# prep alerts into a tsv file for parsing
cut -f 5 "clearlabs.fasta.gz/clearlabs.fasta.gz.vadr.alt.list" | tail -n +2 > "clearlabs.fasta.gz.vadr.alerts.tsv"
cat "clearlabs.fasta.gz.vadr.alerts.tsv" | wc -l > NUM_ALERTS
else
echo "VADR skipped due to poor assembly; assembly length (unambiguous) = 0" > NUM_ALERTS
fi
[2024-01-10 13:55:51,47] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.vadr:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr:/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr:delegated \
us-docker.pkg.dev/general-theiagen/staphb/vadr@sha256:f15a73a6f6e0802c3a9c05e7c602740e7029a2e1ccc14aa6935b459be87c98d7 /cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-vadr/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 13:55:55,94] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.vadr:NA:1]: job id: 43234
[2024-01-10 13:55:55,95] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.vadr:NA:1]: Status change from - to Done
[2024-01-10 13:56:01,11] [info] Assigned new job execution tokens to the following groups: 6d31935b: 1
[2024-01-10 13:56:01,45] [warn] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade_output_parser:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 13:56:01,45] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade_output_parser/inputs/-617491802/clearlabs.fasta.gz.nextclade.tsv -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade/execution/clearlabs.fasta.gz.nextclade.tsv: Operation not permitted
[2024-01-10 13:56:01,46] [info] BackgroundConfigAsyncJobExecutionActor [6d31935btheiacov_fasta.nextclade_output_parser:NA:1]: # Set WDL input variable to input.tsv file
cat "/cromwell-executions/theiacov_fasta/6d31935b-042f-4566-b5b7-a155b681d22c/call-nextclade_output_parser/inputs/-617491802/clearlabs.fasta.gz.nextclade.tsv" > input.tsv
touch TAMIFLU_AASUBS
# Parse outputs using python3
python3 <
✅ theiacov_fasta via cromwell. exit code was 0
/home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus/execution/clearlabs.primertrimmed.rg.sorted.bam: Operation not permitted
[2024-01-10 14:03:01,85] [warn] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage:NA:1]: Unrecognized runtime attribute keys: preemptible, disk, disks, cpu, memory
[2024-01-10 14:03:01,86] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus/execution/clearlabs.primertrimmed.rg.sorted.bam: Operation not permitted
[2024-01-10 14:03:01,86] [warn] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.consensus_qc:NA:1]: Unrecognized runtime attribute keys: preemptible, disk, disks, cpu, memory
[2024-01-10 14:03:01,86] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/inputs/-1423075165/clearlabs.trimmed.rg.sorted.bam -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus/execution/clearlabs.trimmed.rg.sorted.bam: Operation not permitted
[2024-01-10 14:03:01,86] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/inputs/-1423075165/clearlabs.medaka.consensus.fasta -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus/execution/clearlabs.medaka.consensus.fasta: Operation not permitted
[2024-01-10 14:03:01,87] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.consensus_qc:NA:1]: if [ -s "" ] ; then
GENOME_LEN=$(grep -v ">" | tr --delete '\n' | wc -c)
elif [ ] ; then
GENOME_LEN=
else
# set SC2 default
GENOME_LEN=29903
fi
# capture date and version
date | tee DATE
num_N=$( grep -v ">" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/inputs/-1423075165/clearlabs.medaka.consensus.fasta | grep -o 'N' | wc -l )
if [ -z "$num_N" ] ; then num_N="0" ; fi
echo $num_N | tee NUM_N
num_ACTG=$( grep -v ">" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/inputs/-1423075165/clearlabs.medaka.consensus.fasta | grep -o -E "C|A|T|G" | wc -l )
if [ -z "$num_ACTG" ] ; then num_ACTG="0" ; fi
echo $num_ACTG | tee NUM_ACTG
# calculate percent coverage (Wu Han-1 genome length: 29903bp)
python3 -c "print ( round( ($num_ACTG / $GENOME_LEN ) * 100, 2 ) )" | tee PERCENT_REF_COVERAGE
num_degenerate=$( grep -v ">" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/inputs/-1423075165/clearlabs.medaka.consensus.fasta | grep -o -E "B|D|E|F|H|I|J|K|L|M|O|P|Q|R|S|U|V|W|X|Y|Z" | wc -l )
if [ -z "$num_degenerate" ] ; then num_degenerate="0" ; fi
echo $num_degenerate | tee NUM_DEGENERATE
num_total=$( grep -v ">" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/inputs/-1423075165/clearlabs.medaka.consensus.fasta | grep -o -E '[A-Z]' | wc -l )
if [ -z "$num_total" ] ; then num_total="0" ; fi
echo $num_total | tee NUM_TOTAL
[2024-01-10 14:03:01,90] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage_primtrim:NA:1]: date | tee DATE
samtools --version | head -n1 | tee VERSION
samtools stats /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam > clearlabs.stats.txt
samtools coverage /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam -m -o clearlabs.cov.hist
samtools coverage /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam -o clearlabs.cov.txt
samtools flagstat /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam > clearlabs.flagstat.txt
coverage=$(cut -f 6 clearlabs.cov.txt | tail -n 1)
depth=$(cut -f 7 clearlabs.cov.txt | tail -n 1)
meanbaseq=$(cut -f 8 clearlabs.cov.txt | tail -n 1)
meanmapq=$(cut -f 9 clearlabs.cov.txt | tail -n 1)
if [ -z "$coverage" ] ; then coverage="0" ; fi
if [ -z "$depth" ] ; then depth="0" ; fi
if [ -z "$meanbaseq" ] ; then meanbaseq="0" ; fi
if [ -z "$meanmapq" ] ; then meanmapq="0" ; fi
echo $coverage | tee COVERAGE
echo $depth | tee DEPTH
echo $meanbaseq | tee MEANBASEQ
echo $meanmapq | tee MEANMAPQ
[2024-01-10 14:03:01,90] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage:NA:1]: date | tee DATE
samtools --version | head -n1 | tee VERSION
samtools stats /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/inputs/-1423075165/clearlabs.trimmed.rg.sorted.bam > clearlabs.stats.txt
samtools coverage /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/inputs/-1423075165/clearlabs.trimmed.rg.sorted.bam -m -o clearlabs.cov.hist
samtools coverage /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/inputs/-1423075165/clearlabs.trimmed.rg.sorted.bam -o clearlabs.cov.txt
samtools flagstat /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/inputs/-1423075165/clearlabs.trimmed.rg.sorted.bam > clearlabs.flagstat.txt
coverage=$(cut -f 6 clearlabs.cov.txt | tail -n 1)
depth=$(cut -f 7 clearlabs.cov.txt | tail -n 1)
meanbaseq=$(cut -f 8 clearlabs.cov.txt | tail -n 1)
meanmapq=$(cut -f 9 clearlabs.cov.txt | tail -n 1)
if [ -z "$coverage" ] ; then coverage="0" ; fi
if [ -z "$depth" ] ; then depth="0" ; fi
if [ -z "$meanbaseq" ] ; then meanbaseq="0" ; fi
if [ -z "$meanmapq" ] ; then meanmapq="0" ; fi
echo $coverage | tee COVERAGE
echo $depth | tee DEPTH
echo $meanbaseq | tee MEANBASEQ
echo $meanmapq | tee MEANMAPQ
[2024-01-10 14:03:01,90] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.sc2_gene_coverage:NA:1]: samtools index /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam
chr=$(samtools idxstats /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | cut -f 1 | head -1)
samtools coverage -r "${chr}:21563-25384" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam >> clearlabs.cov.txt
s_gene_depth=$(cut -f 7 clearlabs.cov.txt | tail -n 1)
# samtools outputs 3 columns; column 3 is the depth of coverage per nucleotide position, piped to awk to count the positions
# above min_depth, then wc -l counts them all
orf1ab=$(samtools depth -J -r "${chr}:266-21555" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
sgene=$(samtools depth -J -r "${chr}:21563-25384" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf3a=$(samtools depth -J -r "${chr}:25393-26220" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
egene=$(samtools depth -J -r "${chr}:26245-26472" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
mgene=$(samtools depth -J -r "${chr}:26523-27191" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf6=$(samtools depth -J -r "${chr}:27202-27387" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf7a=$(samtools depth -J -r "${chr}:27394-27759" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf7b=$(samtools depth -J -r "${chr}:27756-27887" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf8=$(samtools depth -J -r "${chr}:27894-28259" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
ngene=$(samtools depth -J -r "${chr}:28274-29533" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf10=$(samtools depth -J -r "${chr}:29558-29674" /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/inputs/-1423075165/clearlabs.primertrimmed.rg.sorted.bam | awk -F "\t" '{if ($3 > 20) print;}' | wc -l )
orf1ab_pc=$(python3 -c "print ( round( ($orf1ab / 21290 ) * 100, 2 ) )")
sgene_pc=$(python3 -c "print ( round( ($sgene / 3822 ) * 100, 2 ) )")
orf3a_pc=$(python3 -c "print ( round( ($orf3a / 828 ) * 100, 2 ) )")
egene_pc=$(python3 -c "print ( round( ($egene / 228 ) * 100, 2 ) )")
mgene_pc=$(python3 -c "print ( round( ($mgene / 669 ) * 100, 2 ) )")
orf6_pc=$(python3 -c "print ( round( ($orf6 / 186 ) * 100, 2 ) )")
orf7a_pc=$(python3 -c "print ( round( ($orf7a / 366 ) * 100, 2 ) )")
orf7b_pc=$(python3 -c "print ( round( ($orf7b / 132 ) * 100, 2 ) )")
orf8_pc=$(python3 -c "print ( round( ($orf8 / 366 ) * 100, 2 ) )")
ngene_pc=$(python3 -c "print ( round( ($ngene / 1260 ) * 100, 2 ) )")
orf10_pc=$(python3 -c "print ( round( ($orf10 / 117 ) * 100, 2 ) )")
echo -e "#NOTE: THE VALUES BELOW ASSUME WUHAN-1 REFERENCE GENOME" > clearlabs.percent_gene_coverage.tsv
echo -e "Gene\tPercent_Coverage" >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF1ab\t" $orf1ab_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "S_gene\t" $sgene_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF3a\t" $orf3a_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "E_gene\t" $egene_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "M_gene\t" $mgene_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF6\t" $orf6_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF7a\t" $orf7a_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF7b\t" $orf7b_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF8\t" $orf8_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "N_gene\t" $ngene_pc >> clearlabs.percent_gene_coverage.tsv
echo -e "ORF10\t" $orf10_pc >> clearlabs.percent_gene_coverage.tsv
if [ -z "s_gene_depth" ] ; then s_gene_depth="0"; fi
echo $s_gene_depth | tee S_GENE_DEPTH
echo $sgene_pc | tee S_GENE_PC
[2024-01-10 14:03:01,90] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.pangolin4:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4:delegated \
us-docker.pkg.dev/general-theiagen/staphb/pangolin@sha256:54b03e235ee04f007417cb934d1a263ce73bbd16aa82134efa680bd5d856c38d /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-pangolin4/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:01,93] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.sc2_gene_coverage:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage:delegated \
us-docker.pkg.dev/general-theiagen/staphb/samtools@sha256:ef92cb5e490c75e66e89ab0a113865839e6954435d363af400eaa27177ff7c91 /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-sc2_gene_coverage/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:01,93] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.consensus_qc:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc:delegated \
us-docker.pkg.dev/general-theiagen/theiagen/utility@sha256:b40130a48b94d64db4520ad8580c72cbcb8ea16bba486bc315096c2c3ec8cace /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus_qc/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:01,95] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage:delegated \
us-docker.pkg.dev/general-theiagen/staphb/samtools@sha256:ef92cb5e490c75e66e89ab0a113865839e6954435d363af400eaa27177ff7c91 /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:01,95] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage_primtrim:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim:delegated \
us-docker.pkg.dev/general-theiagen/staphb/samtools@sha256:ef92cb5e490c75e66e89ab0a113865839e6954435d363af400eaa27177ff7c91 /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-stats_n_coverage_primtrim/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:02,12] [warn] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 14:03:02,13] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/inputs/-1423075165/clearlabs.medaka.consensus.fasta -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus/execution/clearlabs.medaka.consensus.fasta: Operation not permitted
[2024-01-10 14:03:02,13] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade:NA:1]: NEXTCLADE_VERSION="$(nextclade --version)"
echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
nextclade dataset get --name="sars-cov-2" --reference="MN908947" --tag="2023-09-21T12:00:00Z" -o nextclade_dataset_dir --verbose
set -e
nextclade run \
--input-dataset=nextclade_dataset_dir/ \
\
\
\
\
\
\
--output-json "clearlabs.medaka.consensus".nextclade.json \
--output-tsv "clearlabs.medaka.consensus".nextclade.tsv \
--output-tree "clearlabs.medaka.consensus".nextclade.auspice.json \
--output-all=. \
"/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/inputs/-1423075165/clearlabs.medaka.consensus.fasta"
[2024-01-10 14:03:02,15] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade:delegated \
us-docker.pkg.dev/general-theiagen/nextstrain/nextclade@sha256:f4440021c7f854433bd56433024f38199de27e0ef657e11d5fb28aad4f265a48 /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:06,33] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage_primtrim:NA:1]: job id: 46135
[2024-01-10 14:03:06,33] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.pangolin4:NA:1]: job id: 46101
[2024-01-10 14:03:06,33] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade:NA:1]: job id: 46161
[2024-01-10 14:03:06,34] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage:NA:1]: job id: 46132
[2024-01-10 14:03:06,34] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage:NA:1]: Status change from - to Done
[2024-01-10 14:03:06,34] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.stats_n_coverage_primtrim:NA:1]: Status change from - to Done
[2024-01-10 14:03:06,34] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.sc2_gene_coverage:NA:1]: job id: 46114
[2024-01-10 14:03:06,34] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.sc2_gene_coverage:NA:1]: Status change from - to Done
[2024-01-10 14:03:06,35] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.consensus_qc:NA:1]: job id: 46120
[2024-01-10 14:03:06,35] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.pangolin4:NA:1]: Status change from - to WaitingForReturnCode
[2024-01-10 14:03:06,35] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade:NA:1]: Status change from - to Done
[2024-01-10 14:03:06,35] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.consensus_qc:NA:1]: Status change from - to Done
[2024-01-10 14:03:08,29] [info] WorkflowExecutionActor-7958a2eb-d020-46f1-83d0-6873d5a32972 [7958a2eb]: Starting theiacov_clearlabs.nextclade_output_parser, theiacov_clearlabs.vadr
[2024-01-10 14:03:11,53] [info] Assigned new job execution tokens to the following groups: 7958a2eb: 2
[2024-01-10 14:03:11,76] [warn] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.vadr:NA:1]: Unrecognized runtime attribute keys: dx_instance_type, cpu, memory
[2024-01-10 14:03:11,77] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/inputs/-1423075165/clearlabs.medaka.consensus.fasta -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-consensus/execution/clearlabs.medaka.consensus.fasta: Operation not permitted
[2024-01-10 14:03:11,78] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.vadr:NA:1]: set -e
if [ 23493 -gt 10000 ]; then
# remove terminal ambiguous nucleotides
/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \
"/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/inputs/-1423075165/clearlabs.medaka.consensus.fasta" \
--minlen 50 \
--maxlen 30000 \
> "clearlabs.medaka.consensus_trimmed.fasta"
# run VADR
# --split and --cpu must be used in conjuction
v-annotate.pl \
--split --cpu 2 \
--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta \
"clearlabs.medaka.consensus_trimmed.fasta" \
"clearlabs.medaka.consensus"
# package everything for output
tar -C "clearlabs.medaka.consensus" -czvf "clearlabs.medaka.consensus.vadr.tar.gz" .
# package up FASTA files into zip file for output. Note: this will work whether the --out_allfasta flag is included or not (there are just more when the option is used)
mkdir -v vadr_fasta_files
cp -v clearlabs.medaka.consensus/*.fa vadr_fasta_files
zip clearlabs.medaka.consensus_vadr-fasta-files.zip vadr_fasta_files/*.fa
# prep alerts into a tsv file for parsing
cut -f 5 "clearlabs.medaka.consensus/clearlabs.medaka.consensus.vadr.alt.list" | tail -n +2 > "clearlabs.medaka.consensus.vadr.alerts.tsv"
cat "clearlabs.medaka.consensus.vadr.alerts.tsv" | wc -l > NUM_ALERTS
else
echo "VADR skipped due to poor assembly; assembly length (unambiguous) = 23493" > NUM_ALERTS
fi
[2024-01-10 14:03:11,80] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.vadr:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr:/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr:delegated \
us-docker.pkg.dev/general-theiagen/staphb/vadr@sha256:f15a73a6f6e0802c3a9c05e7c602740e7029a2e1ccc14aa6935b459be87c98d7 /cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-vadr/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:03:11,97] [warn] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade_output_parser:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 14:03:11,98] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade_output_parser/inputs/-133504526/clearlabs.medaka.consensus.nextclade.tsv -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade/execution/clearlabs.medaka.consensus.nextclade.tsv: Operation not permitted
[2024-01-10 14:03:11,98] [info] BackgroundConfigAsyncJobExecutionActor [7958a2ebtheiacov_clearlabs.nextclade_output_parser:NA:1]: # Set WDL input variable to input.tsv file
cat "/cromwell-executions/theiacov_clearlabs/7958a2eb-d020-46f1-83d0-6873d5a32972/call-nextclade_output_parser/inputs/-133504526/clearlabs.medaka.consensus.nextclade.tsv" > input.tsv
touch TAMIFLU_AASUBS
# Parse outputs using python3
python3 <
✅ theiacov_clearlabs via cromwell
also succeeded, exit code was 0 and cromwell reported success
My terminal scrollback buffer didn't catch every single line of output from cromwell
but here's the command:
$ cromwell run -i ./tests/inputs/theiacov/wf_theiacov_illumina_pe.json -m metadata.json ./workflows/theiacov/wf_theiacov_illumina_pe.wdl
/home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-ivar_consensus/ivar_consensus/c1c33d82-59e0-459f-8f06-9a7d398dfd47/call-consensus/execution/SRR13687078.ivar.consensus.fasta: Operation not permitted
[2024-01-10 14:08:54,98] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.nextclade:NA:1]: NEXTCLADE_VERSION="$(nextclade --version)"
echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
nextclade dataset get --name="sars-cov-2" --reference="MN908947" --tag="2023-09-21T12:00:00Z" -o nextclade_dataset_dir --verbose
set -e
nextclade run \
--input-dataset=nextclade_dataset_dir/ \
\
\
\
\
\
\
--output-json "SRR13687078.ivar.consensus".nextclade.json \
--output-tsv "SRR13687078.ivar.consensus".nextclade.tsv \
--output-tree "SRR13687078.ivar.consensus".nextclade.auspice.json \
--output-all=. \
"/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/inputs/1958973516/SRR13687078.ivar.consensus.fasta"
[2024-01-10 14:08:55,00] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.nextclade:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade:/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade:delegated \
nextstrain/nextclade@sha256:f4440021c7f854433bd56433024f38199de27e0ef657e11d5fb28aad4f265a48 /cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.sc2_gene_coverage:NA:1]: job id: 53041
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.consensus_qc:NA:1]: job id: 53121
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.nextclade:NA:1]: job id: 53174
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.sc2_gene_coverage:NA:1]: Status change from - to Done
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.pangolin4:NA:1]: job id: 53140
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.consensus_qc:NA:1]: Status change from - to Done
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.nextclade:NA:1]: Status change from - to Done
[2024-01-10 14:08:59,39] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.pangolin4:NA:1]: Status change from - to WaitingForReturnCode
[2024-01-10 14:09:00,81] [info] WorkflowExecutionActor-f226a562-09f6-40c5-8a8e-81c591f0be48 [f226a562]: Starting theiacov_illumina_pe.vadr, theiacov_illumina_pe.nextclade_output_parser
[2024-01-10 14:09:04,54] [info] Assigned new job execution tokens to the following groups: f226a562: 2
[2024-01-10 14:09:04,70] [warn] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.vadr:NA:1]: Unrecognized runtime attribute keys: dx_instance_type, cpu, memory
[2024-01-10 14:09:04,72] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/inputs/1958973516/SRR13687078.ivar.consensus.fasta -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-ivar_consensus/ivar_consensus/c1c33d82-59e0-459f-8f06-9a7d398dfd47/call-consensus/execution/SRR13687078.ivar.consensus.fasta: Operation not permitted
[2024-01-10 14:09:04,72] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.vadr:NA:1]: set -e
if [ 24908 -gt 10000 ]; then
# remove terminal ambiguous nucleotides
/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \
"/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/inputs/1958973516/SRR13687078.ivar.consensus.fasta" \
--minlen 50 \
--maxlen 30000 \
> "SRR13687078.ivar.consensus_trimmed.fasta"
# run VADR
# --split and --cpu must be used in conjuction
v-annotate.pl \
--split --cpu 2 \
--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta \
"SRR13687078.ivar.consensus_trimmed.fasta" \
"SRR13687078.ivar.consensus"
# package everything for output
tar -C "SRR13687078.ivar.consensus" -czvf "SRR13687078.ivar.consensus.vadr.tar.gz" .
# package up FASTA files into zip file for output. Note: this will work whether the --out_allfasta flag is included or not (there are just more when the option is used)
mkdir -v vadr_fasta_files
cp -v SRR13687078.ivar.consensus/*.fa vadr_fasta_files
zip SRR13687078.ivar.consensus_vadr-fasta-files.zip vadr_fasta_files/*.fa
# prep alerts into a tsv file for parsing
cut -f 5 "SRR13687078.ivar.consensus/SRR13687078.ivar.consensus.vadr.alt.list" | tail -n +2 > "SRR13687078.ivar.consensus.vadr.alerts.tsv"
cat "SRR13687078.ivar.consensus.vadr.alerts.tsv" | wc -l > NUM_ALERTS
else
echo "VADR skipped due to poor assembly; assembly length (unambiguous) = 24908" > NUM_ALERTS
fi
[2024-01-10 14:09:04,73] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.vadr:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr:/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr:delegated \
us-docker.pkg.dev/general-theiagen/staphb/vadr@sha256:f15a73a6f6e0802c3a9c05e7c602740e7029a2e1ccc14aa6935b459be87c98d7 /cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-vadr/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:09:04,84] [warn] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.nextclade_output_parser:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 14:09:04,84] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade_output_parser/inputs/210698167/SRR13687078.ivar.consensus.nextclade.tsv -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade/execution/SRR13687078.ivar.consensus.nextclade.tsv: Operation not permitted
[2024-01-10 14:09:04,85] [info] BackgroundConfigAsyncJobExecutionActor [f226a562theiacov_illumina_pe.nextclade_output_parser:NA:1]: # Set WDL input variable to input.tsv file
cat "/cromwell-executions/theiacov_illumina_pe/f226a562-09f6-40c5-8a8e-81c591f0be48/call-nextclade_output_parser/inputs/210698167/SRR13687078.ivar.consensus.nextclade.tsv" > input.tsv
touch TAMIFLU_AASUBS
# Parse outputs using python3
python3 <
✅ for theiacov_illumina_pe run via cromwell
TheiaCov_Illumina_SE run via cromwell
:
cromwell command used:
cromwell run -i ./tests/inputs/theiacov/wf_theiacov_illumina_se.json -m metadata.json ./workflows/theiacov/wf_theiacov_illumina_se.wdl
/home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-ivar_consensus/ivar_consensus/3700ee04-0c79-46e9-80b1-b086abe16c34/call-consensus/execution/ERR6319327.ivar.consensus.fasta: Operation not permitted
[2024-01-10 14:26:41,29] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.nextclade:NA:1]: NEXTCLADE_VERSION="$(nextclade --version)"
echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
nextclade dataset get --name="sars-cov-2" --reference="MN908947" --tag="2023-09-21T12:00:00Z" -o nextclade_dataset_dir --verbose
set -e
nextclade run \
--input-dataset=nextclade_dataset_dir/ \
\
\
\
\
\
\
--output-json "ERR6319327.ivar.consensus".nextclade.json \
--output-tsv "ERR6319327.ivar.consensus".nextclade.tsv \
--output-tree "ERR6319327.ivar.consensus".nextclade.auspice.json \
--output-all=. \
"/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/inputs/1765922037/ERR6319327.ivar.consensus.fasta"
[2024-01-10 14:26:41,30] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.nextclade:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade:/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade:delegated \
us-docker.pkg.dev/general-theiagen/nextstrain/nextclade@sha256:f4440021c7f854433bd56433024f38199de27e0ef657e11d5fb28aad4f265a48 /cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:26:45,76] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.pangolin4:NA:1]: job id: 60138
[2024-01-10 14:26:45,76] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.nextclade:NA:1]: job id: 60191
[2024-01-10 14:26:45,77] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.nextclade:NA:1]: Status change from - to Done
[2024-01-10 14:26:45,77] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.consensus_qc:NA:1]: job id: 60141
[2024-01-10 14:26:45,77] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.sc2_gene_coverage:NA:1]: job id: 60066
[2024-01-10 14:26:45,77] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.sc2_gene_coverage:NA:1]: Status change from - to Done
[2024-01-10 14:26:45,77] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.pangolin4:NA:1]: Status change from - to WaitingForReturnCode
[2024-01-10 14:26:45,77] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.consensus_qc:NA:1]: Status change from - to Done
[2024-01-10 14:26:47,10] [info] WorkflowExecutionActor-4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1 [4cd7fc7e]: Starting theiacov_illumina_se.vadr, theiacov_illumina_se.nextclade_output_parser
[2024-01-10 14:26:50,95] [info] Assigned new job execution tokens to the following groups: 4cd7fc7e: 2
[2024-01-10 14:26:51,11] [warn] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.vadr:NA:1]: Unrecognized runtime attribute keys: dx_instance_type, cpu, memory
[2024-01-10 14:26:51,11] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/inputs/1765922037/ERR6319327.ivar.consensus.fasta -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-ivar_consensus/ivar_consensus/3700ee04-0c79-46e9-80b1-b086abe16c34/call-consensus/execution/ERR6319327.ivar.consensus.fasta: Operation not permitted
[2024-01-10 14:26:51,12] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.vadr:NA:1]: set -e
if [ 29377 -gt 10000 ]; then
# remove terminal ambiguous nucleotides
/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \
"/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/inputs/1765922037/ERR6319327.ivar.consensus.fasta" \
--minlen 50 \
--maxlen 30000 \
> "ERR6319327.ivar.consensus_trimmed.fasta"
# run VADR
# --split and --cpu must be used in conjuction
v-annotate.pl \
--split --cpu 2 \
--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta \
"ERR6319327.ivar.consensus_trimmed.fasta" \
"ERR6319327.ivar.consensus"
# package everything for output
tar -C "ERR6319327.ivar.consensus" -czvf "ERR6319327.ivar.consensus.vadr.tar.gz" .
# package up FASTA files into zip file for output. Note: this will work whether the --out_allfasta flag is included or not (there are just more when the option is used)
mkdir -v vadr_fasta_files
cp -v ERR6319327.ivar.consensus/*.fa vadr_fasta_files
zip ERR6319327.ivar.consensus_vadr-fasta-files.zip vadr_fasta_files/*.fa
# prep alerts into a tsv file for parsing
cut -f 5 "ERR6319327.ivar.consensus/ERR6319327.ivar.consensus.vadr.alt.list" | tail -n +2 > "ERR6319327.ivar.consensus.vadr.alerts.tsv"
cat "ERR6319327.ivar.consensus.vadr.alerts.tsv" | wc -l > NUM_ALERTS
else
echo "VADR skipped due to poor assembly; assembly length (unambiguous) = 29377" > NUM_ALERTS
fi
[2024-01-10 14:26:51,13] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.vadr:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr:/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr:delegated \
us-docker.pkg.dev/general-theiagen/staphb/vadr@sha256:f15a73a6f6e0802c3a9c05e7c602740e7029a2e1ccc14aa6935b459be87c98d7 /cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-vadr/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:26:51,35] [warn] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.nextclade_output_parser:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 14:26:51,35] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade_output_parser/inputs/-990009139/ERR6319327.ivar.consensus.nextclade.tsv -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade/execution/ERR6319327.ivar.consensus.nextclade.tsv: Operation not permitted
[2024-01-10 14:26:51,36] [info] BackgroundConfigAsyncJobExecutionActor [4cd7fc7etheiacov_illumina_se.nextclade_output_parser:NA:1]: # Set WDL input variable to input.tsv file
cat "/cromwell-executions/theiacov_illumina_se/4cd7fc7e-48f0-4bc3-98ba-ca96aee775e1/call-nextclade_output_parser/inputs/-990009139/ERR6319327.ivar.consensus.nextclade.tsv" > input.tsv
touch TAMIFLU_AASUBS
# Parse outputs using python3
python3 <
✅ theiacov_illumina_se run via cromwell
was successful & exit code was 0
One more to go - theiacov_ont
theiacov_ont via cromwell
command used:
$ cromwell run -i ./tests/inputs/theiacov/wf_theiacov_ont.json -m metadata.json ./workflows/theiacov/wf_theiacov_ont.wdl
/home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-consensus/execution/ont.medaka.consensus.fasta: Operation not permitted
[2024-01-10 14:37:59,09] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nextclade:NA:1]: NEXTCLADE_VERSION="$(nextclade --version)"
echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
nextclade dataset get --name="sars-cov-2" --reference="MN908947" --tag="2023-09-21T12:00:00Z" -o nextclade_dataset_dir --verbose
set -e
nextclade run \
--input-dataset=nextclade_dataset_dir/ \
\
\
\
\
\
\
--output-json "ont.medaka.consensus".nextclade.json \
--output-tsv "ont.medaka.consensus".nextclade.tsv \
--output-tree "ont.medaka.consensus".nextclade.auspice.json \
--output-all=. \
"/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/inputs/-1676033136/ont.medaka.consensus.fasta"
[2024-01-10 14:37:59,12] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nextclade:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade:/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade:delegated \
nextstrain/nextclade@sha256:f4440021c7f854433bd56433024f38199de27e0ef657e11d5fb28aad4f265a48 /cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.consensus_qc:NA:1]: job id: 65354
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.stats_n_coverage_primtrim:NA:1]: job id: 65360
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.pangolin4:NA:1]: job id: 65388
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nextclade:NA:1]: job id: 65407
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.sc2_gene_coverage:NA:1]: job id: 65349
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.stats_n_coverage:NA:1]: job id: 65380
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.consensus_qc:NA:1]: Status change from - to Done
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nextclade:NA:1]: Status change from - to Done
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.stats_n_coverage_primtrim:NA:1]: Status change from - to Done
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.pangolin4:NA:1]: Status change from - to WaitingForReturnCode
[2024-01-10 14:38:03,52] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.stats_n_coverage:NA:1]: Status change from - to Done
[2024-01-10 14:38:03,53] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.sc2_gene_coverage:NA:1]: Status change from - to Done
[2024-01-10 14:38:05,29] [info] WorkflowExecutionActor-a74105f8-2597-4528-bb3c-9eb852973d92 [a74105f8]: Starting theiacov_ont.nanoplot_raw, theiacov_ont.vadr, theiacov_ont.nextclade_output_parser, theiacov_ont.nanoplot_clean
[2024-01-10 14:38:08,66] [info] Assigned new job execution tokens to the following groups: a74105f8: 4
[2024-01-10 14:38:08,91] [warn] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.vadr:NA:1]: Unrecognized runtime attribute keys: dx_instance_type, cpu, memory
[2024-01-10 14:38:08,91] [warn] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nanoplot_raw:NA:1]: Unrecognized runtime attribute keys: preemptible, disk, disks, cpu, memory
[2024-01-10 14:38:08,91] [warn] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nanoplot_clean:NA:1]: Unrecognized runtime attribute keys: preemptible, disk, disks, cpu, memory
[2024-01-10 14:38:08,91] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/inputs/-1676033136/ont.medaka.consensus.fasta -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-consensus/execution/ont.medaka.consensus.fasta: Operation not permitted
[2024-01-10 14:38:08,92] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_clean/inputs/1986200811/artic_ncov2019_ont.fastq -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-read_qc_trim/read_QC_trim_ont/dbaaa31c-b302-4d0d-9600-5d4ddf59c076/call-read_filtering/execution/artic_ncov2019_ont.fastq: Operation not permitted
[2024-01-10 14:38:08,92] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nanoplot_raw:NA:1]: # get version
NanoPlot --version | tee "VERSION"
# run nanoplot
# --prefix for output file tag
# --threads for number of threads allowed
# --N50 to display N50 mark in read length histogram
# --loglength to show logarithmic scaling of lengths
# --tsv_stats to output the stats file in TSV format
# --maxlength to hide reads longer than this
NanoPlot \
--fastq /cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw/inputs/1032112200/ont.fastq.gz \
--prefix "ont_" \
--threads 4 \
--N50 \
--loglength \
--tsv_stats \
--maxlength 100000
# grep read statistics from tsv stats file
grep "number_of_reads" ont_NanoStats.txt | cut -f 2 | tee NUMBER_OF_READS
NUM_BASES=$(grep "number_of_bases" ont_NanoStats.txt | cut -f 2 | tee NUMBER_OF_BASES)
grep "mean_read_length" ont_NanoStats.txt | cut -f 2 | tee MEAN_READ_LENGTH
grep "mean_qual" ont_NanoStats.txt | cut -f 2 | tee MEAN_QUAL
# estimate coverage
# using math: C = N / G where N is number of bases, and G is estimated genome size
python3 -c "print(round(${NUM_BASES} / 29903, 2))" | tee EST_COVERAGE
[2024-01-10 14:38:08,92] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.vadr:NA:1]: set -e
if [ 27125 -gt 10000 ]; then
# remove terminal ambiguous nucleotides
/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \
"/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/inputs/-1676033136/ont.medaka.consensus.fasta" \
--minlen 50 \
--maxlen 30000 \
> "ont.medaka.consensus_trimmed.fasta"
# run VADR
# --split and --cpu must be used in conjuction
v-annotate.pl \
--split --cpu 2 \
--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta \
"ont.medaka.consensus_trimmed.fasta" \
"ont.medaka.consensus"
# package everything for output
tar -C "ont.medaka.consensus" -czvf "ont.medaka.consensus.vadr.tar.gz" .
# package up FASTA files into zip file for output. Note: this will work whether the --out_allfasta flag is included or not (there are just more when the option is used)
mkdir -v vadr_fasta_files
cp -v ont.medaka.consensus/*.fa vadr_fasta_files
zip ont.medaka.consensus_vadr-fasta-files.zip vadr_fasta_files/*.fa
# prep alerts into a tsv file for parsing
cut -f 5 "ont.medaka.consensus/ont.medaka.consensus.vadr.alt.list" | tail -n +2 > "ont.medaka.consensus.vadr.alerts.tsv"
cat "ont.medaka.consensus.vadr.alerts.tsv" | wc -l > NUM_ALERTS
else
echo "VADR skipped due to poor assembly; assembly length (unambiguous) = 27125" > NUM_ALERTS
fi
[2024-01-10 14:38:08,93] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.vadr:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr:/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr:delegated \
us-docker.pkg.dev/general-theiagen/staphb/vadr@sha256:f15a73a6f6e0802c3a9c05e7c602740e7029a2e1ccc14aa6935b459be87c98d7 /cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-vadr/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:38:08,95] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nanoplot_raw:NA:1]: executing: # make sure there is no preexisting Docker CID file
rm -f /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw/execution/docker_cid
# run as in the original configuration without --rm flag (will remove later)
docker run \
--cidfile /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw/execution/docker_cid \
-i \
\
--entrypoint /bin/bash \
-v /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw:/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw:delegated \
us-docker.pkg.dev/general-theiagen/staphb/nanoplot@sha256:68edf4b3e515b179fa12ec8405ba196af204625e2f30b7e5512427f773c81d79 /cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw/execution/script
# get the return code (working even if the container was detached)
rc=$(docker wait cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw/execution/docker_cid)
# remove the container after waiting
docker rm cat /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_raw/execution/docker_cid
# return exit code
exit $rc
[2024-01-10 14:38:08,96] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nanoplot_clean:NA:1]: # get version
NanoPlot --version | tee "VERSION"
# run nanoplot
# --prefix for output file tag
# --threads for number of threads allowed
# --N50 to display N50 mark in read length histogram
# --loglength to show logarithmic scaling of lengths
# --tsv_stats to output the stats file in TSV format
# --maxlength to hide reads longer than this
NanoPlot \
--fastq /cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nanoplot_clean/inputs/1986200811/artic_ncov2019_ont.fastq \
--prefix "ont_" \
--threads 4 \
--N50 \
--loglength \
--tsv_stats \
--maxlength 100000
# grep read statistics from tsv stats file
grep "number_of_reads" ont_NanoStats.txt | cut -f 2 | tee NUMBER_OF_READS
NUM_BASES=$(grep "number_of_bases" ont_NanoStats.txt | cut -f 2 | tee NUMBER_OF_BASES)
grep "mean_read_length" ont_NanoStats.txt | cut -f 2 | tee MEAN_READ_LENGTH
grep "mean_qual" ont_NanoStats.txt | cut -f 2 | tee MEAN_QUAL
# estimate coverage
# using math: C = N / G where N is number of bases, and G is estimated genome size
python3 -c "print(round(${NUM_BASES} / 29903, 2))" | tee EST_COVERAGE
[2024-01-10 14:38:08,97] [warn] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nextclade_output_parser:NA:1]: Unrecognized runtime attribute keys: disk, dx_instance_type, disks, cpu, memory
[2024-01-10 14:38:08,98] [warn] Localization via hard link has failed: /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade_output_parser/inputs/-386462497/ont.medaka.consensus.nextclade.tsv -> /home/curtis_kapsak/github/public_health_bioinformatics/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade/execution/ont.medaka.consensus.nextclade.tsv: Operation not permitted
[2024-01-10 14:38:08,98] [info] BackgroundConfigAsyncJobExecutionActor [a74105f8theiacov_ont.nextclade_output_parser:NA:1]: # Set WDL input variable to input.tsv file
cat "/cromwell-executions/theiacov_ont/a74105f8-2597-4528-bb3c-9eb852973d92/call-nextclade_output_parser/inputs/-386462497/ont.medaka.consensus.nextclade.tsv" > input.tsv
touch TAMIFLU_AASUBS
# Parse outputs using python3
python3 <
✅ theiacov_ont via cromwell
was successful and exit code was 0.
That wraps up all the cromwell tests that failed via the GHActions runner VMs, presumably due to disk space limitations, but passed on my local CI environment.
We are good to merge 🎉
Closes
:hammer_and_wrench: Changes Being Made
Impacted Workflows/Tasks
:brain: Context and Rationale
:clipboard: Workflow/Task Steps
Inputs
Outputs
Impacted Outputs
:test_tube: Testing
Locally
Terra
Scenarios for Reviewer to Test
:microscope: Quality checks
Pull Request (PR) checklist: