Open njbernstein opened 4 years ago
Adding more to this. If someone would have time to look at this I would greatly appreciate it!
@cjllanwarne
Workflow:
workflow Mutect2 {
input {
File tumor_reads
Int small_task_cpu = 2
Int small_task_mem = 4
Int small_task_disk = 100
Int boot_disk_size = 12
Int learn_read_orientation_mem = 8000
Int filter_alignment_artifacts_mem = 9000
# Use as a last resort to increase the disk given to every task in case of ill behaving data
Int? emergency_extra_disk
# These are multipliers to multipler inputs by to make sure we have enough disk to accommodate for possible output sizes
# Large is for Bams/WGS vcfs
# Small is for metrics/other vcfs
Float large_input_to_output_multiplier = 2.25
Float small_input_to_output_multiplier = 2.0
Float cram_to_bam_multiplier = 6.0
}
Int preemptible_or_default = 2
Int max_retries_or_default = 2
# Disk sizes used for dynamic sizing
Int ref_size = 10
Int tumor_only_reads_size = 10
Int tumor_reads_size = tumor_only_reads_size + 1
Int gnomad_vcf_size = 1
Int normal_reads_size = 1
# If no tar is provided, the task downloads one from broads ftp server
Int funco_tar_size = 100
Int gatk_override_size = 0
# This is added to every task as padding, should increase if systematically you need more disk for every call
Int disk_pad = 10 + gatk_override_size
# logic about output file names -- these are the names *without* .vcf extensions
String output_basename = "SRR2619134" #hacky way to strip either .bam or .cram
String output_fullname = "SRR2619134"
Int tumor_cram_to_bam_disk = 10
Int normal_cram_to_bam_disk = 10
# assume alignment file without suffix is bam
# rename and index bam files without .bam suffix
call renameBamIndex {
input:
name = output_basename,
bam = tumor_reads,
disk_size = tumor_cram_to_bam_disk,
}
output {
File filtered_vcf = renameBamIndex.output_bam
}
}
task renameBamIndex {
input {
String name
File bam
Int disk_size
Int? mem
String? sra
File? ngc
}
Int machine_mem = if defined(mem) then mem * 1000 else 6000
command {
echo ~{bam}
cp ~{bam} ~{name}.bam
cp /cromwell_root/~{name}/~{name} ~{name}.bam
samtools index -b ~{name}.bam
cp ~{name}.bam.bai ~{name}.bai
}
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
memory: machine_mem + " MB"
disks: "local-disk " + disk_size + " HDD"
}
output {
File output_bam = "~{name}.bam"
File output_bai = "~{name}.bai"
}
}
input:
{
"Mutect2.tumor_reads": "sra://SRR2619134/SRR2619134"
}
wdl:
include required(classpath("application"))
google {
application-name = "cromwell"
auths = [
{
name = "application-default"
scheme = "application_default"
}
]
}
filesystems {
sra {
class = "cromwell.filesystems.sra.SraPathBuilderFactory"
docker-image = "fusera/fusera:alpine"
ngc = "/home/nicholas/.sra/prj_26387_D28121.ngc"
}
}
engine {
filesystems {
gcs {
auth = "application-default"
}
}
}
backend {
default = PAPIv2
providers {
PAPIv2 {
actor-factory = "cromwell.backend.google.pipelines.v2alpha1.PipelinesApiLifecycleActorFactory"
config {
concurrent-job-limit = 10000
max-concurrent-workflows = 10000
genomics-api-queries-per-100-seconds = 10000
maximum-polling-interval = 300
max-workflow-launch-count = 2000
// Google project
project = "calico-uk-biobank"
compute-service-account = "default"
// Base bucket for workflow executions
root = "nicholas-b-test"
// Polling for completion backs-off gradually for slower-running jobs.
// This is the maximum polling interval (in seconds):
// Optional Dockerhub Credentials. Can be used to access private docker images.
dockerhub {
// account = ""
// token = ""
}
genomics {
// A reference to an auth defined in the `google` stanza at the top. This auth is used to create
// Pipelines and manipulate auth JSONs.
auth = "application-default"
// Endpoint for APIs, no reason to change this unless directed by Google.
endpoint-url = "https://genomics.googleapis.com/"
enable-fuse = true
}
filesystems {
sra {}
gcs {
// A reference to a potentially different auth for manipulating files via engine functions.
auth = "application-default"
}
}
}
}
}
}
system {
input-read-limits {
lines = 12800000
bool = 7
int = 19
float = 50
string = 12800000
json = 12800000
tsv = 12800000
map = 12800000
object = 12800000
}
}
@cjllanwarne helped me with this issue but its led to new ones: https://github.com/broadinstitute/cromwell/issues/5793
Cromwell tries to chmod the mounted sra directory which is not allowed. code: https://github.com/broadinstitute/cromwell/blob/5c8f932b6e1a5706286913e21c78dc296dd5c79c/supportedBackends/google/pipelines/v2alpha1/src/main/scala/cromwell/backend/google/pipelines/v2alpha1/api/ContainerSetup.scala error: