Closed larrybabb closed 11 months ago
The script for this is in the clingen_stage classic project querries under the name extract-all_variant_functional_consquence_data
CREATE TEMP FUNCTION parseFunctionalConsequence(json STRING)
RETURNS ARRAY<STRUCT<type STRUCT<label STRING, db STRING, id STRING, url STRING>, comment STRING, dataSource STRING, visibility STRING>>
LANGUAGE js AS """
const data = JSON.parse(json);
let functionalConsequence = data.FunctionalConsequence;
/* if not an array, make it into a single element array */
if (!Array.isArray(functionalConsequence)) {
functionalConsequence = [functionalConsequence];
}
return functionalConsequence.map(item => ({
type : {
'label': (item['@Value']?item['@Value']:null),
'db' : (item.XRef && item.XRef['@DB'] ? item.XRef['@DB'] : null),
'id' : (item.XRef && item.XRef['@ID'] ? item.XRef['@ID'] : null),
'url' : (item.XRef && item.XRef['@URL'] ? item.XRef['@URL'] : null)
},
comment : (item.Comment && item.Comment['$']? item.Comment['$'] : null),
dataSource : (item.Comment && item.Comment['@DataSource'] ? item.Comment['@DataSource'] : null),
visibility : (item.Comment && item.Comment['@Type'] ? item.Comment['@Type'] : null)
}));
""";
CREATE TEMP FUNCTION parseMethod(json STRING)
RETURNS ARRAY<STRUCT<type STRING, description STRING, attrib STRING, attrib_type STRING, platform STRING, citations ARRAY<STRUCT<id STRING, source STRING>>, citations_str STRING>>
LANGUAGE js AS """
function parseCitation(citation) {
/* if not an array, make it into a single element array */
if (!Array.isArray(citation)) {
citation = [citation];
}
return citation.map(item => ({
id : (item.ID && item.ID['$'] ? item.ID['$'] : null),
source : (item.ID && item.ID['@Source'] ? item.ID['@Source'] : null)
}));
}
const data = JSON.parse(json);
let method = data.Method;
/* if not an array, make it into a single element array */
if (!Array.isArray(method)) {
method = [method];
}
return method.map(item => ({
type : (item.MethodType && item.MethodType['$'] ? item.MethodType['$']:null),
description : (item.Description && item.Description['$'] ? item.Description['$'] : null),
attrib : (item.ObsMethodAttribute && item.ObsMethodAttribute.Attribute && item.ObsMethodAttribute.Attribute['$'] ? item.ObsMethodAttribute.Attribute['$'] : null),
attrib_type : (item.ObsMethodAttribute && item.ObsMethodAttribute.Attribute && item.ObsMethodAttribute.Attribute['@Type'] ? item.ObsMethodAttribute.Attribute['@Type'] : null),
platform : (item.TypePlatform && item.TypePlatform['$'] ? item.TypePlatform['$'] : null),
citations : (item.Citation ? parseCitation(item.Citation) : null),
citations_str : (item.Citation ? "["+parseCitation(item.Citation).map(obj => obj.source+":"+obj.id).join(";")+"]" : null)
}));
""";
WITH x AS
(
select
vcv.id||'.'||vcv.version as vcv_id,
v.id as variation_id,
v.name as variation_name,
ca.id||'.'||ca.version as scv_id,
ca.review_status as scv_review_status,
ca.submitter_id,
s.current_name as submitter_name,
parseMethod(cao.content) as method,
parseFunctionalConsequence(cav.content) as functionalConsequence
from `clingen-stage.clinvar_2023_12_17_v1_6_61.clinical_assertion` ca
cross join unnest(ca.clinical_assertion_observation_ids) as cao_id
join `clingen-stage.clinvar_2023_12_17_v1_6_61.clinical_assertion_variation` cav
on
cav.clinical_assertion_id = ca.id
join `clingen-stage.clinvar_2023_12_17_v1_6_61.variation_archive` vcv
on
vcv.id = ca.variation_archive_id
join `clingen-stage.clinvar_2023_12_17_v1_6_61.variation` v
on
v.id = ca.variation_id
join `clingen-stage.clinvar_2023_12_17_v1_6_61.submitter` s
on
s.id = ca.submitter_id
join `clingen-stage.clinvar_2023_12_17_v1_6_61.clinical_assertion_observation` cao
on
cao.id = cao_id
where cav.content like '%FunctionalConsequence%'
)
select
x.vcv_id,
x.variation_id,
x.variation_name,
x.scv_id,
x.scv_review_status,
x.submitter_id,
x.submitter_name,
STRING_AGG(FORMAT('- ( %s ) %s\n\tResult: %s %s', ifnull(m.type, 'not provided'), ifnull(m.description,'not provided'), ifnull(m.attrib,'not provided'), ifnull(m.citations_str,'')), '\n') as method,
STRING_AGG(DISTINCT FORMAT('- %s [%s%s]\n\tComment: %s', ifnull(f.type.label,'not provided'), ifnull(f.type.db||' ', ''), if(f.type.id is not null, if(f.type.db = "Variation Ontology", "VariO:"||f.type.id, f.type.id), 'not provided'), ifnull(f.comment,'not provided')), '\n') as funcConsq
from x
cross join unnest (x.method) as m
cross join unnest (x.functionalConsequence) as f
group by
x.vcv_id,
x.variation_id,
x.variation_name,
x.scv_id,
x.scv_review_status,
x.submitter_id,
x.submitter_name
;
per the Dec 8, 2023 Clingen clinvar cuartion stakeholder meeting the request to identify all functional data in clinvar arose. I committed to putting together a basic report that would identify the functional data records and the scvs/variants they are associated with in a google sheet on the most recent release at the time I put the report together.
It will be distributed on the clinvar-curation slack channel.