xie186 / miscellaneous_note

0 stars 0 forks source link

snpEff database buid #16

Open xie186 opened 4 years ago

xie186 commented 4 years ago
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_genomic.fna.gz
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_genomic.gff.gz
xie186 commented 4 years ago
# snpEff/4.4
 module load bioinfo snpEff/4.4
xie186 commented 4 years ago
mkdir /depot/bioinf/apps/apps/snpEff-4.3T/data/Amel_HAv3.1

wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_genomic.fna.gz
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_genomic.gff.gz
mv GCF_003254395.2_Amel_HAv3.1_genomic.fna.gz sequences.fa.gz
mv GCF_003254395.2_Amel_HAv3.1_genomic.gff.gz genes.gff.gz
gunzip sequences.fa.gz
gunzip genes.gff.gz

wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_protein.faa.gz
gunzip GCF_003254395.2_Amel_HAv3.1_protein.faa.gz
mv GCF_003254395.2_Amel_HAv3.1_protein.faa /depot/bioinfo/apps/apps/snpEff-4.3T/./data/Amel_HAv3.1/protein.fa
xie186 commented 4 years ago
cd /depot/bioinfo/apps/apps/snpEff-4.3T
java -jar snpEff.jar build -gff3 -v Amel_HAv3.1

test.log

xie186 commented 4 years ago
find  /depot/bioinfo/apps/apps/snpEff-4.3T  -name .dir_bash_history_xie186
/depot/bioinfo/apps/apps/snpEff-4.3T/.dir_bash_history_xie186
/depot/bioinfo/apps/apps/snpEff-4.3T/data/.dir_bash_history_xie186
/depot/bioinfo/apps/apps/snpEff-4.3T/data/Amel_HAv3.1/.dir_bash_history_xie186

rm

xie186 commented 4 years ago
java -jar /depot/bioinfo/apps/apps/snpEff-4.3T/snpEff.jar databases |less
java -jar /depot/bioinfo/apps/apps/snpEff-4.3T/snpEff.jar  Amel_HAv3.1 test.vcf > test.anno.test

test.vcf

#NC_037638.1     Gnomon  CDS     11201   11244 NC_037638.1     Gnomon  CDS     11201   11244   .       -       0       ID=cds-XP_623975.1;Parent=rna-XM_623972.6;Dbxref=GeneID:551580,Genbank:XP_623975.1,BEEBASE:GB42195;Name=XP_623975.1;gbkey=CDS;gene=LOC551580;product=ubiquitin-related modifier 1;protein_id=XP_623975.1
NC_037638.1     11201   rs111   C       T       29      PASS
NC_037638.1     9478    rs1111  T       A       29      PASS
xie186 commented 4 years ago
#NC_037638.1     Gnomon  CDS     11201   11244 NC_037638.1     Gnomon  CDS     11201   11244   .       -       0       ID=cds-XP_623975.1;Parent=rna-XM_623972.6;Dbxref=GeneID:551580,Genbank:XP_623975.1,BEEBASE:GB42195;Name=XP_623975.1;gbkey=CDS;gene=LOC551580;product=ubiquitin-related modifier 1;protein_id=XP_623975.1
##SnpEffVersion="4.3t (build 2017-11-24 10:18), by Pablo Cingolani"
##SnpEffCmd="SnpEff  Amel_HAv3.1 test.vcf "
##INFO=<ID=ANN,Number=.,Type=String,Description="Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO' ">
##INFO=<ID=LOF,Number=.,Type=String,Description="Predicted loss of function effects for this variant. Format: 'Gene_Name | Gene_ID | Number_of_transcripts_in_gene | Percent_of_transcripts_affected'">
##INFO=<ID=NMD,Number=.,Type=String,Description="Predicted nonsense mediated decay effects for this variant. Format: 'Gene_Name | Gene_ID | Number_of_transcripts_in_gene | Percent_of_transcripts_affected'">
NC_037638.1     11201   rs111   C       T       29.0    PASS    ANN=T|missense_variant&splice_region_variant|MODERATE|exon-XM_623972.6-1|GENE_exon-XM_623972.6-1|transcript|rna-XM_623972.6|protein_coding|1/5|c.44G>A|p.Gly15Glu|974/1447|44/312|15/103||,T|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_006557404.3|protein_coding||c.-5979C>T|||||1081|WARNING_TRANSCRIPT_NO_START_CODON,T|upstream_gene_variant|MODIFIER|exon-XM_006557404.3-1|GENE_exon-XM_006557404.3-1|transcript|rna-XM_006557404.3|protein_coding||c.-1454C>T|||||1081|,T|upstream_gene_variant|MODIFIER|exon-XM_006557406.3-1|GENE_exon-XM_006557406.3-1|transcript|rna-XM_006557406.3|protein_coding||c.-1297C>T|||||1091|,T|upstream_gene_variant|MODIFIER|exon-XM_623949.6-1|GENE_exon-XM_623949.6-1|transcript|rna-XM_623949.6|protein_coding||c.-1297C>T|||||1091|,T|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_006557406.3|protein_coding||c.-5979C>T|||||1091|WARNING_TRANSCRIPT_NO_START_CODON,T|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_623949.6|protein_coding||c.-5979C>T|||||1091|WARNING_TRANSCRIPT_NO_START_CODON,T|intron_variant|MODIFIER|exon-XR_001705490.2-1|GENE_exon-XR_001705490.2-1|transcript|rna-XR_001705490.2|pseudogene|1/4|n.597-80G>A||||||,T|intron_variant|MODIFIER|exon-XR_001705491.2-1|GENE_exon-XR_001705491.2-1|transcript|rna-XR_001705491.2|pseudogene|1/4|n.364-80G>A||||||,T|intron_variant|MODIFIER|exon-XM_006557405.3-1|GENE_exon-XM_006557405.3-1|transcript|rna-XM_006557405.3|protein_coding|1/5|c.-15+133C>T||||||,T|non_coding_transcript_variant|MODIFIER|LOC551580|gene-LOC551580|transcript|XM_623972.6|protein_coding||||||||WARNING_TRANSCRIPT_NO_START_CODON,T|non_coding_transcript_variant|MODIFIER|LOC551580|gene-LOC551580|transcript|XR_001705490.2|pseudogene||||||||,T|non_coding_transcript_variant|MODIFIER|LOC551580|gene-LOC551580|transcript|XR_001705491.2|pseudogene||||||||,T|non_coding_transcript_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_006557405.3|protein_coding||||||||WARNING_TRANSCRIPT_NO_START_CODON
NC_037638.1     9478    rs1111  T       A       29.0    PASS    ANN=A|stop_lost|HIGH|exon-XM_623972.6-1|GENE_exon-XM_623972.6-1|transcript|rna-XM_623972.6|protein_coding|5/5|c.312A>T|p.Ter104Tyrext*?|1242/1447|312/312|104/103||,A|upstream_gene_variant|MODIFIER|exon-XM_006557405.3-1|GENE_exon-XM_006557405.3-1|transcript|rna-XM_006557405.3|protein_coding||c.-3177T>A|||||1314|,A|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_006557404.3|protein_coding||c.-7702T>A|||||2804|WARNING_TRANSCRIPT_NO_START_CODON,A|upstream_gene_variant|MODIFIER|exon-XM_006557404.3-1|GENE_exon-XM_006557404.3-1|transcript|rna-XM_006557404.3|protein_coding||c.-3177T>A|||||2804|,A|upstream_gene_variant|MODIFIER|exon-XM_006557406.3-1|GENE_exon-XM_006557406.3-1|transcript|rna-XM_006557406.3|protein_coding||c.-3020T>A|||||2814|,A|upstream_gene_variant|MODIFIER|exon-XM_623949.6-1|GENE_exon-XM_623949.6-1|transcript|rna-XM_623949.6|protein_coding||c.-3020T>A|||||2814|,A|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_006557405.3|protein_coding||c.-7701T>A|||||1314|WARNING_TRANSCRIPT_NO_START_CODON,A|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_006557406.3|protein_coding||c.-7702T>A|||||2814|WARNING_TRANSCRIPT_NO_START_CODON,A|upstream_gene_variant|MODIFIER|LOC551555|gene-LOC551555|transcript|XM_623949.6|protein_coding||c.-7702T>A|||||2814|WARNING_TRANSCRIPT_NO_START_CODON,A|non_coding_transcript_exon_variant|MODIFIER|exon-XR_001705490.2-1|GENE_exon-XR_001705490.2-1|transcript|rna-XR_001705490.2|pseudogene|5/5|n.864A>T||||||,A|non_coding_transcript_exon_variant|MODIFIER|exon-XR_001705491.2-1|GENE_exon-XR_001705491.2-1|transcript|rna-XR_001705491.2|pseudogene|5/5|n.631A>T||||||,A|non_coding_transcript_variant|MODIFIER|LOC551580|gene-LOC551580|transcript|XM_623972.6|protein_coding||||||||WARNING_TRANSCRIPT_NO_START_CODON,A|non_coding_transcript_variant|MODIFIER|LOC551580|gene-LOC551580|transcript|XR_001705490.2|pseudogene||||||||,A|non_coding_transcript_variant|MODIFIER|LOC551580|gene-LOC551580|transcript|XR_001705491.2|pseudogene||||||||