One thing to note is how the query is sorting the results, by CHROM, pos, etc.:
```sql
-- def _get_variants(target: str):
select
array_agg(distinct iv.zygosity order by iv.zygosity) as zigosity,
array_agg(distinct concat(g.hgnc_symbol,'@',g.ensembl_gene_id)) as genes, -- to split
v.chrom as "CHROM", v.pos as "POS", v."ref" as "REF", v.alt as "ALT", v.cadd_phred, v.dann,
v.fathmm_score, v.revel, -- new added
-- removed: v.id
vg.most_severe_consequence, string_agg(distinct vg.hgvs_c,',' order by vg.hgvs_c) as hgvsc,
string_agg(distinct vg.hgvs_p,',' order by vg.hgvs_p) as hgvsp, -- via variant_gene
iv.dp as "DP", iv."fs" as "FS", iv.mq as "MQ", iv."filter" as "FILTER", -- via individual_variant
(
select array_agg(i.phenopolis_id order by i.id)
from phenopolis.individual i
join phenopolis.individual_variant iv2 on iv2.individual_id = i.id and iv2.zygosity = 'HOM'
where v.id = iv2.variant_id
) as "HOM",
(
select array_agg(i.phenopolis_id order by i.id)
from phenopolis.individual i
join phenopolis.individual_variant iv2 on iv2.individual_id = i.id and iv2.zygosity = 'HET'
where v.id = iv2.variant_id
) as "HET",
(
select distinct on (ah.chrom,ah.pos,ah."ref",ah.alt) ah.af from kaviar.annotation_hg19 ah
where ah.chrom = v.chrom and ah.pos = v.pos and ah."ref" = v."ref" and ah.alt = v.alt
order by ah.chrom,ah.pos,ah."ref",ah.alt,ah.ac desc
) as af_kaviar,
av.af as af_gnomad_genomes -- gnomad # NOTE: missing strand?
-- deprecated: MLEAF, MLEAC
-- need to be added (by Daniele): af_converge, af_hgvd, af_jirdc, af_krgdb, af_tommo,
from phenopolis.variant v
join phenopolis.individual_variant iv on iv.variant_id = v.id
join phenopolis.individual i2 on i2.id = iv.individual_id
left outer join phenopolis.variant_gene vg on vg.variant_id = v.id -- variant_gene not complete?
left outer join ensembl.gene g on vg.gene_id = g.ensembl_gene_id
and g.assembly = 'GRCh37' and g.chromosome ~ '^X|^Y|^[0-9]{1,2}'
left outer join gnomad.annotation_v3 av
on av.chrom = v.chrom and av.pos = v.pos and av."ref" = v."ref" and av.alt = v.alt
--where v.chrom = '12' and v.pos = 7241974 and v."ref" = 'C' and v.alt = 'T' -- 2 rows
--where v.chrom = '7' and v.pos = 2303057 and v."ref" = 'G' and v.alt = 'A' -- 1 row
--where i2.phenopolis_id = 'PH00008256'
--where vg.gene_id = 'ENSG00000144285'
where i2.phenopolis_id = 'PH00008697'
group by "CHROM","POS","REF","ALT",cadd_phred,dann,fathmm_score,revel,most_severe_consequence,
"DP","FS","MQ","FILTER", -- need for array_agg but disambiguates depending on individual_variant
"HOM","HET",af_kaviar,af_gnomad_genomes
order by
substring(v.chrom FROM '([0-9]+)')::int,
v.pos, v."ref", v.alt, iv.dp desc
limit 10000 offset 0 -- attempt to paginate
;
```
In this way, zigosityHET and HOM can be alternated in rows, so individual page, which has 3 tabs: RARE HOMOZYGOTES, RARE COMPOUND HETEROZYGOTES AND RARE HETEROZYGOTES, may not paginate as desired.
I'm investigating a better solution. Perhaps pagination could be better done in def _individual_complete_view(...) rather than in the query above since it's there where rare_comp_hets are processed.
Spin off from #342.
Query in
variant.py
:def _get_variants(target: str):
need to have pagination since some patients, like http://dev-live.phenopolis.org/individual/PH00008697 has tens of thousands of variants.One thing to note is how the query is sorting the results, by CHROM, pos, etc.:
```sql -- def _get_variants(target: str): select array_agg(distinct iv.zygosity order by iv.zygosity) as zigosity, array_agg(distinct concat(g.hgnc_symbol,'@',g.ensembl_gene_id)) as genes, -- to split v.chrom as "CHROM", v.pos as "POS", v."ref" as "REF", v.alt as "ALT", v.cadd_phred, v.dann, v.fathmm_score, v.revel, -- new added -- removed: v.id vg.most_severe_consequence, string_agg(distinct vg.hgvs_c,',' order by vg.hgvs_c) as hgvsc, string_agg(distinct vg.hgvs_p,',' order by vg.hgvs_p) as hgvsp, -- via variant_gene iv.dp as "DP", iv."fs" as "FS", iv.mq as "MQ", iv."filter" as "FILTER", -- via individual_variant ( select array_agg(i.phenopolis_id order by i.id) from phenopolis.individual i join phenopolis.individual_variant iv2 on iv2.individual_id = i.id and iv2.zygosity = 'HOM' where v.id = iv2.variant_id ) as "HOM", ( select array_agg(i.phenopolis_id order by i.id) from phenopolis.individual i join phenopolis.individual_variant iv2 on iv2.individual_id = i.id and iv2.zygosity = 'HET' where v.id = iv2.variant_id ) as "HET", ( select distinct on (ah.chrom,ah.pos,ah."ref",ah.alt) ah.af from kaviar.annotation_hg19 ah where ah.chrom = v.chrom and ah.pos = v.pos and ah."ref" = v."ref" and ah.alt = v.alt order by ah.chrom,ah.pos,ah."ref",ah.alt,ah.ac desc ) as af_kaviar, av.af as af_gnomad_genomes -- gnomad # NOTE: missing strand? -- deprecated: MLEAF, MLEAC -- need to be added (by Daniele): af_converge, af_hgvd, af_jirdc, af_krgdb, af_tommo, from phenopolis.variant v join phenopolis.individual_variant iv on iv.variant_id = v.id join phenopolis.individual i2 on i2.id = iv.individual_id left outer join phenopolis.variant_gene vg on vg.variant_id = v.id -- variant_gene not complete? left outer join ensembl.gene g on vg.gene_id = g.ensembl_gene_id and g.assembly = 'GRCh37' and g.chromosome ~ '^X|^Y|^[0-9]{1,2}' left outer join gnomad.annotation_v3 av on av.chrom = v.chrom and av.pos = v.pos and av."ref" = v."ref" and av.alt = v.alt --where v.chrom = '12' and v.pos = 7241974 and v."ref" = 'C' and v.alt = 'T' -- 2 rows --where v.chrom = '7' and v.pos = 2303057 and v."ref" = 'G' and v.alt = 'A' -- 1 row --where i2.phenopolis_id = 'PH00008256' --where vg.gene_id = 'ENSG00000144285' where i2.phenopolis_id = 'PH00008697' group by "CHROM","POS","REF","ALT",cadd_phred,dann,fathmm_score,revel,most_severe_consequence, "DP","FS","MQ","FILTER", -- need for array_agg but disambiguates depending on individual_variant "HOM","HET",af_kaviar,af_gnomad_genomes order by substring(v.chrom FROM '([0-9]+)')::int, v.pos, v."ref", v.alt, iv.dp desc limit 10000 offset 0 -- attempt to paginate ; ```In this way,
zigosity
HET
andHOM
can be alternated in rows, soindividual
page, which has 3 tabs:RARE HOMOZYGOTES
,RARE COMPOUND HETEROZYGOTES
ANDRARE HETEROZYGOTES
, may not paginate as desired.I'm investigating a better solution. Perhaps pagination could be better done in
def _individual_complete_view(...)
rather than in the query above since it's there whererare_comp_hets
are processed.