Open loipf opened 2 years ago
updated version:
### created filtered vcf
/home/loipf/Documents/tools/bcftools/bin/bcftools view -O z -o $sample_id"_somatic_combiner_PASS.vcf.gz" -f PASS $sample_id"_somatic_combiner_all.vcf.gz"
tabix -p vcf $sample_id"_somatic_combiner_PASS.vcf.gz"
gunzip -c $sample_id"_somatic_combiner_PASS.vcf.gz" > $sample_id"_somatic_combiner_PASS.vcf"
vcf2maf.pl --input-vcf $sample_id"_somatic_combiner_PASS.vcf" --output-maf $sample_id"_somatic_combiner_PASS.vep.maf" --tumor-id $sample_id"_tumor" --normal-id $sample_id"_normal" --ref-fasta ~/.vep/homo_sapiens/101_GRCh38/Homo_sapiens.GRCh38.dna.toplevel.fa.gz --vep-path ~/miniconda3/envs/loipf_vep/bin --ncbi-build GRCh38 --vep-overwrite --retain-info Tumor_AF,Tumor_DP,NumCallers,lLsSMD,Strelka_MQ,Mutect2_MMQ,Lofreq_AF,Lofreq_QUAL --retain-fmt DP,AD,GT,Vardict_MQ,Vardict_AD,Strelka_AU,Strelka_CU,Strelka_GU,Strelka_TU --max-subpop-af 0.01
rm $sample_id"_somatic_combiner_PASS.vcf"
post-processing:
############################################
### pre-edit maf file with filling missing counts
WES_MAF_FILE = file.path(EXTERNAL_STORAGE_DIRECTORY, "metastasis_wes/vc_caller/all_samples_snps_indels_PASS_reads_missing.vep.maf")
wes_df = fread(WES_MAF_FILE)
### strelka + lofreq doesnt fill read counts automatically
for(curr_row_num in 1:nrow(wes_df)) {
# print(curr_row_num)
# curr_row_num = 4
curr_row = wes_df[curr_row_num,]
if(is.na(curr_row$t_ref_count)) {
curr_row$t_ref_count = strsplit(curr_row[[paste0('t_Strelka_',curr_row$Tumor_Seq_Allele1,'U')]], ',')[[1]][1]
curr_row$t_alt_count = strsplit(curr_row[[paste0('t_Strelka_',curr_row$Tumor_Seq_Allele2,'U')]], ',')[[1]][1]
curr_row$n_ref_count = strsplit(curr_row[[paste0('n_Strelka_',curr_row$Match_Norm_Seq_Allele1,'U')]], ',')[[1]][1]
if(curr_row$Match_Norm_Seq_Allele1 == curr_row$Match_Norm_Seq_Allele2) {
curr_row$n_alt_count = 0
} else {
curr_row$n_alt_count = strsplit(curr_row[[paste0('n_Strelka_',curr_row$Match_Norm_Seq_Allele2,'U')]], ',')[[1]][1]
}
wes_df[curr_row_num,] = curr_row
}
}
output_file = file.path(dirname(file.path(WES_MAF_FILE)),"all_samples_snps_indels_PASS.vep.maf")
# cat("#version 2.4\n", file = output_file)
cat(paste0("#version 2.4\n", paste0(colnames(wes_df), collapse = '\t'), "\n"), file = output_file)
fwrite(wes_df, output_file, sep = '\t', append = T, quote=F)
improve pipeline to automatically filter and create
.maf
.vcf
files and create.maf
somatic combiner defined but not added to vcf
sed -i '7 i ##FORMAT=' somatic_combiner_sample.vcf
rm somatic_combiner_raw.vcf
/tools/bcftools/bin/bcftools stats -f PASS -s $sample_id"_normal" --threads 5 somatic_combiner_sample.vcf > $sample_id"_normal_PASS_vcfstats.txt" /tools/bcftools/bin/bcftools stats -f PASS -s $sample_id"_tumor" --threads 5 somatic_combiner_sample.vcf > $sample_id"_tumor_PASS_vcfstats.txt"
only snps
/tools/bcftools/bin/bcftools view --types snps -f PASS somatic_combiner_sample.vcf > somatic_combiner_sample_snps_only_filtered.vcf /tools/bcftools/bin/bcftools stats somatic_combiner_sample_snps_only_filtered.vcf > somatic_combiner_sample_snps_only_filtered_vcfstats.txt
only indels
/tools/bcftools/bin/bcftools view --exclude-types snps -f PASS somatic_combiner_sample.vcf > somatic_combiner_sample_indels_only_filtered.vcf /tools/bcftools/bin/bcftools stats somatic_combiner_sample_indels_only_filtered.vcf > somatic_combiner_sample_indels_only_filtered_vcfstats.txt
vcf2maf.pl --input-vcf somatic_combiner_sample_snps_only_filtered.vcf --output-maf somatic_combiner_sample_snps_only_filtered.vep.maf --tumor-id $sample_id"_tumor" --normal-id $sample_id"_normal" --ref-fasta ~/.vep/homo_sapiens/101_GRCh38/Homo_sapiens.GRCh38.dna.toplevel.fa.gz --vep-path ~/miniconda3/envs/loipf_vep/bin --ncbi-build GRCh38
vcf2maf.pl --input-vcf somatic_combiner_sample_indels_only_filtered.vcf --output-maf somatic_combiner_sample_indels_only_filtered.vep.maf --tumor-id $sample_id"_tumor" --normal-id $sample_id"_normal" --ref-fasta ~/.vep/homo_sapiens/101_GRCh38/Homo_sapiens.GRCh38.dna.toplevel.fa.gz --vep-path ~/miniconda3/envs/loipf_vep/bin --ncbi-build GRCh38
only snps
cat $OUTPUT_DIR//somatic_combiner_sample_indels_only_filtered.vep.maf | egrep "^#|^Hugo_Symbol" | head -2 > all_samples_indels_filtered.vep.maf cat $OUTPUT_DIR//somatic_combiner_sample_indels_only_filtered.vep.maf | egrep -v "^#|^Hugo_Symbol" >> all_samples_indels_filtered.vep.maf
only indels
cat $OUTPUT_DIR//somatic_combiner_sample_snps_only_filtered.vep.maf | egrep "^#|^Hugo_Symbol" | head -2 > all_samples_snps_filtered.vep.maf cat $OUTPUT_DIR//somatic_combiner_sample_snps_only_filtered.vep.maf | egrep -v "^#|^Hugo_Symbol" >> all_samples_snps_filtered.vep.maf