clingen-data-model / clinvar-curation-reporting

@todo @Danielle
0 stars 0 forks source link

Create a one-off report containing all Functional Data in clinvar #15

Closed larrybabb closed 11 months ago

larrybabb commented 11 months ago

per the Dec 8, 2023 Clingen clinvar cuartion stakeholder meeting the request to identify all functional data in clinvar arose. I committed to putting together a basic report that would identify the functional data records and the scvs/variants they are associated with in a google sheet on the most recent release at the time I put the report together.

It will be distributed on the clinvar-curation slack channel.

larrybabb commented 11 months ago

done https://docs.google.com/spreadsheets/d/158uxmlcPaV5Olc5xQIKNQkC5654hJrq-SBQ2pLbUNEY/edit#gid=1844858884

The script for this is in the clingen_stage classic project querries under the name extract-all_variant_functional_consquence_data

CREATE TEMP FUNCTION parseFunctionalConsequence(json STRING)
RETURNS ARRAY<STRUCT<type STRUCT<label STRING, db STRING, id STRING, url STRING>, comment STRING, dataSource STRING, visibility STRING>>
LANGUAGE js AS """
  const data = JSON.parse(json);
  let functionalConsequence = data.FunctionalConsequence;

  /* if not an array, make it into a single element array */
  if (!Array.isArray(functionalConsequence)) {
    functionalConsequence = [functionalConsequence];
  }

  return functionalConsequence.map(item => ({
    type : {
      'label': (item['@Value']?item['@Value']:null),
      'db'   : (item.XRef && item.XRef['@DB'] ? item.XRef['@DB'] : null),
      'id'   : (item.XRef && item.XRef['@ID'] ? item.XRef['@ID'] : null),
      'url'   : (item.XRef && item.XRef['@URL'] ? item.XRef['@URL'] : null)
    },
    comment    : (item.Comment && item.Comment['$']? item.Comment['$'] : null),
    dataSource : (item.Comment && item.Comment['@DataSource'] ? item.Comment['@DataSource'] : null),
    visibility : (item.Comment && item.Comment['@Type'] ? item.Comment['@Type'] : null)
  }));
""";

CREATE TEMP FUNCTION parseMethod(json STRING)
RETURNS ARRAY<STRUCT<type STRING, description STRING, attrib STRING, attrib_type STRING, platform STRING, citations ARRAY<STRUCT<id STRING, source STRING>>, citations_str STRING>>
LANGUAGE js AS """
  function parseCitation(citation) {

    /* if not an array, make it into a single element array */
    if (!Array.isArray(citation)) {
      citation = [citation];
    }

    return citation.map(item => ({
      id     : (item.ID && item.ID['$'] ? item.ID['$'] : null),
      source : (item.ID && item.ID['@Source'] ? item.ID['@Source'] : null)
    }));
  }

  const data = JSON.parse(json);
  let method = data.Method;

  /* if not an array, make it into a single element array */
  if (!Array.isArray(method)) {
    method = [method];
  }

  return method.map(item => ({
    type        : (item.MethodType && item.MethodType['$'] ? item.MethodType['$']:null),
    description : (item.Description && item.Description['$'] ? item.Description['$'] : null),
    attrib      : (item.ObsMethodAttribute && item.ObsMethodAttribute.Attribute && item.ObsMethodAttribute.Attribute['$'] ? item.ObsMethodAttribute.Attribute['$'] : null),
    attrib_type : (item.ObsMethodAttribute && item.ObsMethodAttribute.Attribute && item.ObsMethodAttribute.Attribute['@Type'] ? item.ObsMethodAttribute.Attribute['@Type'] : null),
    platform    : (item.TypePlatform && item.TypePlatform['$'] ? item.TypePlatform['$'] : null),
    citations   : (item.Citation ?  parseCitation(item.Citation) : null),
    citations_str : (item.Citation ?  "["+parseCitation(item.Citation).map(obj => obj.source+":"+obj.id).join(";")+"]" : null)
  }));
""";
WITH x AS
(
  select 
    vcv.id||'.'||vcv.version as vcv_id,
    v.id as variation_id,
    v.name as variation_name,
    ca.id||'.'||ca.version as scv_id,
    ca.review_status as scv_review_status,
    ca.submitter_id,
    s.current_name as submitter_name,
    parseMethod(cao.content) as method,
    parseFunctionalConsequence(cav.content) as functionalConsequence
  from `clingen-stage.clinvar_2023_12_17_v1_6_61.clinical_assertion` ca
  cross join unnest(ca.clinical_assertion_observation_ids) as cao_id

  join `clingen-stage.clinvar_2023_12_17_v1_6_61.clinical_assertion_variation` cav
  on
    cav.clinical_assertion_id = ca.id
  join `clingen-stage.clinvar_2023_12_17_v1_6_61.variation_archive` vcv
  on
    vcv.id = ca.variation_archive_id
  join `clingen-stage.clinvar_2023_12_17_v1_6_61.variation` v
  on
    v.id = ca.variation_id
  join `clingen-stage.clinvar_2023_12_17_v1_6_61.submitter` s
  on
    s.id = ca.submitter_id
  join `clingen-stage.clinvar_2023_12_17_v1_6_61.clinical_assertion_observation` cao
  on 
    cao.id = cao_id
  where cav.content like '%FunctionalConsequence%'
)
select 
  x.vcv_id,
  x.variation_id,
  x.variation_name,
  x.scv_id,
  x.scv_review_status,
  x.submitter_id,
  x.submitter_name,
  STRING_AGG(FORMAT('- ( %s ) %s\n\tResult: %s %s', ifnull(m.type, 'not provided'), ifnull(m.description,'not provided'), ifnull(m.attrib,'not provided'), ifnull(m.citations_str,'')), '\n') as method,
  STRING_AGG(DISTINCT FORMAT('- %s [%s%s]\n\tComment: %s', ifnull(f.type.label,'not provided'), ifnull(f.type.db||' ', ''), if(f.type.id is not null, if(f.type.db = "Variation Ontology", "VariO:"||f.type.id, f.type.id), 'not provided'), ifnull(f.comment,'not provided')), '\n') as funcConsq
from x
cross join unnest (x.method) as m
cross join unnest (x.functionalConsequence) as f

group by 
  x.vcv_id,
  x.variation_id,
  x.variation_name,
  x.scv_id,
  x.scv_review_status,
  x.submitter_id,
  x.submitter_name
;