Specifically its not incrementing the alternate_allele_count when genotype 2 corresponds to the allele in question.
Here's our result:
SELECT
contig_name,
start_pos,
reference_bases,
alternate_bases,
alternate_allele_count
FROM
[google.com:biggene:pgp_analysis_results.gvcf_variants_allele_counts]
WHERE
contig_name = '17'
AND start_pos = 41203325
AND reference_bases = 'TG'
but compare the output of
SELECT
contig_name,
start_pos,
end_pos,
reference_bases,
GROUP_CONCAT(alternate_bases) WITHIN RECORD AS alt,
SVTYPE,
END,
GROUP_CONCAT(call.callset_name) WITHIN call as name,
GROUP_CONCAT(STRING(call.genotype)) WITHIN call AS gt
FROM
[google.com:biggene:pgp.gvcf_variants]
WHERE
contig_name = '17'
AND start_pos = 41203325
AND reference_bases = 'TG'
to
SELECT contig_name, start_pos, reference_bases, alternate_bases, alternate_allele_count
FROM js(
[google.com:biggene:pgp.gvcf_variants],
contig_name, start_pos, reference_bases, alternate_bases, call.genotype,
"[{name: 'contig_name', type: 'string'},
{name: 'start_pos', type: 'integer'},
{name: 'reference_bases', type: 'string'},
{name: 'alternate_bases', type: 'string'},
{name: 'alternate_allele_count', type: 'integer'}]",
"function(r, emit) {
for(var a in r.alternate_bases) {
var alt_gt = a + 1;
var alt_count = 0;
for(var c in r.call) {
for(var g in r.call[c].genotype) {
if(alt_gt == r.call[c].genotype[g]) {
alt_count++;
}
}
}
// Emit one record per alt
emit({
contig_name: r.contig_name,
start_pos: r.start_pos,
reference_bases: r.reference_bases,
alternate_bases: r.alternate_bases[a],
alternate_allele_count: alt_count
});
}
}")
WHERE
contig_name = '17'
AND start_pos = 41203325 AND reference_bases = 'TG'
There appears to be a bug in https://github.com/googlegenomics/bigquery-examples/blob/master/pgp/sql/gvcf_variants/allele-count.sql
Specifically its not incrementing the alternate_allele_count when genotype 2 corresponds to the allele in question.
Here's our result:
but compare the output of
to