datirium / scidap

Scientific Data Analysis Platform
https://scidap.com/
Apache License 2.0
0 stars 3 forks source link

Automatically generate indices for basic genomes #1

Closed michael-kotliar closed 4 years ago

michael-kotliar commented 5 years ago

After setting up the new system all basic genome indices, such as hg19, hg38, mm10, etc, should be available by default.

To solve this problem, it's worth submitting the following jobs to be run by genome-indices.cwl pipeline

mm10

{
  "fasta": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/mm10/fasta/mm10.fa",
    "token": ""
  },
  "fasta_ribosomal": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/mm10/fasta/ribo.fa",
    "token": ""
  },
  "fasta_mitochondrial": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/mm10/fasta/chrM.fa",
    "token": ""
  },
  "input_annotation_gtf": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/mm10/annotation/refgene.gtf",
    "token": ""
  },
  "annotation_tab": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/mm10/annotation/refgene.tsv",
    "token": ""
  },
  "threads": 4,
  "limit_genome_generate_ram": 20000000000,
  "effective_genome_size": "2.4e9",
  "genome_sa_index_n_bases": 14,
  "genome_sa_index_n_bases_mitochondrial": 6,
  "genome_sa_sparse_d": 2,
  "genome": "mm10",
  "genome_label": "Mus musculus (mm10)",
  "genome_description": "Genome Reference Consortium Mouse Build 38 (mm10). Strain: C57BL/6J. 2012/01/09",
  "genome_details": "https://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.20/"
}

hg19

{
  "fasta": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg19/fasta/hg19.fa",
    "token": ""
  },
  "fasta_ribosomal": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg19/fasta/ribo.fa",
    "token": ""
  },
  "fasta_mitochondrial": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg19/fasta/chrM.fa",
    "token": ""
  },
  "input_annotation_gtf": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg19/annotation/refgene.gtf",
    "token": ""
  },
  "annotation_tab": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg19/annotation/refgene.tsv",
    "token": ""
  },
  "threads": 4,
  "limit_genome_generate_ram": 20000000000,
  "effective_genome_size": "2.73e9",
  "genome_sa_index_n_bases": 14,
  "genome_sa_index_n_bases_mitochondrial": 6,
  "genome_sa_sparse_d": 2,
  "genome": "hg19",
  "genome_label": "Homo sapiens (hg19)",
  "genome_description": "Genome Reference Consortium Human Build 37 (GRCh37/hg19) 2009/02/27",
  "genome_details": "https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13/"
}

hg38

{
  "fasta": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg38/fasta/hg38.fa",
    "token": ""
  },
  "fasta_ribosomal": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg38/fasta/ribo.fa",
    "token": ""
  },
  "fasta_mitochondrial": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg38/fasta/chrM.fa",
    "token": ""
  },
  "input_annotation_gtf": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg38/annotation/refgene.gtf",
    "token": ""
  },
  "annotation_tab": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg38/annotation/refgene.tsv",
    "token": ""
  },
  "threads": 4,
  "limit_genome_generate_ram": 20000000000,
  "effective_genome_size": "2.74e9",
  "genome_sa_index_n_bases": 14,
  "genome_sa_index_n_bases_mitochondrial": 6,
  "genome_sa_sparse_d": 2,
  "genome": "hg38",
  "genome_label": "Homo sapiens (hg38)",
  "genome_description": "Genome Reference Consortium Human Build 38 (GRCh38/hg38) 2013/12/17",
  "genome_details": "https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26/"
}
michael-kotliar commented 4 years ago

After merging PR https://github.com/datirium/workflows/pull/48 the workflow will generate files required for IGV plugin.

All the input files except ribo.fa are downloaded from the official resources.

mm10

{
  "cytoband":{
    "location":"http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/cytoBandIdeo.txt.gz",
    "token": ""
  },
  "genome_2bit_file": {
    "location": "ftp://hgdownload.cse.ucsc.edu/goldenPath/mm10/bigZips/mm10.2bit",
    "token": ""
  },
  "fasta_ribosomal": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/mm10/fasta/ribo.fa",
    "token": ""
  },
  "annotation_tab": {
    "location": "http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/refGene.txt.gz",
    "token": ""
  },
  "mitochondrial_annotation_tab": {
    "location": "ftp://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/wgEncodeGencodeBasicVM18.txt.gz",
    "token": ""
  },
  "threads": 4,
  "limit_genome_generate_ram": 20000000000,
  "effective_genome_size": "2.4e9",
  "genome_sa_index_n_bases": 14,
  "genome_sa_index_n_bases_mitochondrial": 6,
  "genome_sa_sparse_d": 2,
  "genome": "mm10",
  "genome_label": "Mus musculus (mm10)",
  "genome_description": "Genome Reference Consortium Mouse Build 38 (mm10). Strain: C57BL/6J. 2012/01/09",
  "genome_details": "https://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.20/",
  "chromosome_list":["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chrX", "chrY", "chrM"]
}

hg19

{
  "cytoband":{
    "location":"http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBandIdeo.txt.gz",
    "token": ""
  },
  "genome_2bit_file": {
    "location": "ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/hg19.2bit",
    "token": ""
  },
  "fasta_ribosomal": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg19/fasta/ribo.fa",
    "token": ""
  },
  "annotation_tab": {
    "location": "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz",
    "token": ""
  },
  "mitochondrial_annotation_tab": {
    "location": "ftp://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/wgEncodeGencodeBasicV19.txt.gz",
    "token": ""
  },
  "threads": 4,
  "limit_genome_generate_ram": 20000000000,
  "effective_genome_size": "2.73e9",
  "genome_sa_index_n_bases": 14,
  "genome_sa_index_n_bases_mitochondrial": 6,
  "genome_sa_sparse_d": 2,
  "genome": "hg19",
  "genome_label": "Homo sapiens (hg19)",
  "genome_description": "Genome Reference Consortium Human Build 37 (GRCh37/hg19) 2009/02/27",
  "genome_details": "https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13/",
  "chromosome_list":["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY", "chrM"]
}

hg38

{
  "cytoband":{
    "location":"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBandIdeo.txt.gz",
    "token": ""
  },
  "genome_2bit_file": {
    "location": "ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit",
    "token": ""
  },
  "fasta_ribosomal": {
    "location": "http://scidap.com/downloads/genome_indices/inputs/hg38/fasta/ribo.fa",
    "token": ""
  },
  "annotation_tab": {
    "location": "http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/refGene.txt.gz",
    "token": ""
  },
  "mitochondrial_annotation_tab": {
    "location": "ftp://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/wgEncodeGencodeBasicV28.txt.gz",
    "token": ""
  },
  "threads": 4,
  "limit_genome_generate_ram": 20000000000,
  "effective_genome_size": "2.74e9",
  "genome_sa_index_n_bases": 14,
  "genome_sa_index_n_bases_mitochondrial": 6,
  "genome_sa_sparse_d": 2,
  "genome": "hg38",
  "genome_label": "Homo sapiens (hg38)",
  "genome_description": "Genome Reference Consortium Human Build 38 (GRCh38/hg38) 2013/12/17",
  "genome_details": "https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26/",
  "chromosome_list":["chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY", "chrM"]
}
portah commented 4 years ago

Implemented for now.