Hi! I'm quite new to bioinformatics and I'm running into an issue using krisp_vcf where I get the following error:
(py311) Janehol@bcsumx01:~/krisp$ krisp_vcf krisp_metadata.csv Race_1.masked.fa --vcf merged_DNAs.vcf.gz --groups NA2 --out_csv krisp_clade2_diagnostic_variants.csv
Traceback (most recent call last):
File "/home/Janehol/data/miniconda3/envs/py311/bin/krisp_vcf", line 8, in
sys.exit(main())
^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 1393, in main
run_all()
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 1383, in run_all
for result in report_diag_region(args.vcf, contig, groups, reference, args,
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 1193, in report_diag_region
for region in find_diag_region(variants, groups, reference, kwargs):
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 791, in find_diag_region
for region in windower:
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 204, in sliding_window
for index, variant in enumerate(variants):
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 196, in from_vcf
out = cls(var, groups, check_groups=False, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 162, in init
count_data = self._sample_counts(variant, self.groups,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 405, in _sample_counts
output = cls._subset_sample_counts(variant, samples,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 391, in _subset_sample_counts
is_good = {s: variant.samples[s]['DP'] is not None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 393, in
and variant.samples[s]['GQ'] is not None
File "pysam/libcbcf.pyx", line 3541, in pysam.libcbcf.VariantRecordSample.__getitem__
File "pysam/libcbcf.pyx", line 813, in pysam.libcbcf.bcf_format_get_value
KeyError: 'invalid FORMAT: GQ'
The VCF file I am using was previously filtered for a minGQ of 20 and has calls for variants with GQ values and some calls with no GQ values (below is an example of one of the samples):
scaffold_1 1 . G . 111 . MQ0F=0;MQ=30;DP=8;DP4=8,0,0,0;ADF=3;ADR=0;AD=3;AN=14 GT:DP:SP:ADF:ADR:AD
Is the VCF file the issue or am I doing something wrong? Any advice would be greatly appreciated!
Hi! I'm quite new to bioinformatics and I'm running into an issue using krisp_vcf where I get the following error:
(py311) Janehol@bcsumx01:~/krisp$ krisp_vcf krisp_metadata.csv Race_1.masked.fa --vcf merged_DNAs.vcf.gz --groups NA2 --out_csv krisp_clade2_diagnostic_variants.csv Traceback (most recent call last): File "/home/Janehol/data/miniconda3/envs/py311/bin/krisp_vcf", line 8, in
sys.exit(main())
^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 1393, in main
run_all()
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 1383, in run_all
for result in report_diag_region(args.vcf, contig, groups, reference, args,
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 1193, in report_diag_region
for region in find_diag_region(variants, groups, reference, kwargs):
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 791, in find_diag_region
for region in windower:
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/krisp_vcf.py", line 204, in sliding_window
for index, variant in enumerate(variants):
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 196, in from_vcf
out = cls(var, groups, check_groups=False, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 162, in init
count_data = self._sample_counts(variant, self.groups,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 405, in _sample_counts
output = cls._subset_sample_counts(variant, samples,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 391, in _subset_sample_counts
is_good = {s: variant.samples[s]['DP'] is not None
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/Janehol/genomics/2-MISC-BY-USER/krisp/src/krisp/krisp_vcf/find_diag_var.py", line 393, in
and variant.samples[s]['GQ'] is not None