callahantiff / PheKnowVec

Translational Computational Phenotyping
2 stars 0 forks source link

Project Meeting -- 06/18/2019 @ 16:00 #96

Closed callahantiff closed 5 years ago

callahantiff commented 5 years ago

Meeting Date: 06/18/2019 Topic: Phenotype Queries Attendees: @mgkahn

Proposed Agenda:

POST-MEETING UPDATE:

  1. Thank you for providing the information on CTEs, I love that approach! The updated query is included below.
  2. The phenotype definition criteria in the Wiki have also been updated and maps onto the CTEs in the query;
  3. I have not included the covariate variables in the table yet, focusing first on identifying the cohorts.
WITH age_criteria_1 AS (
SELECT DISTINCT co.person_id, cohort.standard_code_set AS code_set 
FROM 
  CHCO_DeID_Oct2018.person p, 
  CHCO_DeID_Oct2018.condition_occurrence co, 
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort 
WHERE p.person_id = co.person_id 
  AND co.condition_concept_id = CAST(cohort.standard_concept_id AS int64) 
  AND cohort.phenotype_definition_number = 1 
GROUP BY
  co.person_id, p.birth_datetime, cohort.standard_code_set 
HAVING
  DATETIME_DIFF(DATETIME(MIN(co.condition_start_datetime)), DATETIME(p.birth_datetime), DAY) >= 1460
),

dx_case_inclusion_criteria_1 AS (
SELECT DISTINCT co.person_id, cohort.standard_code_set AS code_set 
FROM 
  CHCO_DeID_Oct2018.person p, 
  CHCO_DeID_Oct2018.condition_occurrence co, 
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort, 
  CHCO_DeID_Oct2018.visit_occurrence v 
WHERE 
  p.person_id = co.person_id 
  AND co.visit_occurrence_id = v.visit_occurrence_id 
  AND co.condition_concept_id = CAST(cohort.standard_concept_id AS int64) 
  AND v.admitting_source_concept_id IN (44814672, 8870, 38004279, 8863, 8546) 
  AND cohort.phenotype_definition_number = 1 
GROUP BY 
  co.person_id, cohort.standard_code_set 
HAVING 
  COUNT(DISTINCT co.condition_concept_id) >= 1 
  AND COUNT(DISTINCT v.visit_start_date) > 1
),

rx_case_inclusion_criteria_1 AS (
SELECT DISTINCT de.person_id, cohort.standard_code_set AS code_set 
FROM 
  CHCO_DeID_Oct2018.drug_exposure de, 
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort 
WHERE 
  de.drug_concept_id = CAST(cohort.standard_concept_id AS int64) 
  AND cohort.phenotype_definition_number = 2 
GROUP BY 
  de.person_id, cohort.standard_code_set 
HAVING 
  COUNT(DISTINCT de.drug_concept_id) >= 1
),

dx_case_exclusion_criteria_1 AS (
SELECT DISTINCT co.person_id, cohort.standard_code_set AS code_set
FROM 
  CHCO_DeID_Oct2018.condition_occurrence co,
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort
WHERE 
  co.condition_concept_id = CAST(cohort.standard_concept_id AS int64)
  AND cohort.phenotype_definition_number = 3
GROUP BY co.person_id, cohort.standard_code_set
),

dx_case_inclusion_criteria_2 AS (
SELECT DISTINCT co.person_id, cohort.standard_code_set AS code_set 
FROM 
  CHCO_DeID_Oct2018.person p, 
  CHCO_DeID_Oct2018.condition_occurrence co, 
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort, 
  CHCO_DeID_Oct2018.visit_occurrence v 
WHERE 
  p.person_id = co.person_id 
  AND co.visit_occurrence_id = v.visit_occurrence_id 
  AND co.condition_concept_id = CAST(cohort.standard_concept_id AS int64) 
  AND v.admitting_source_concept_id IN (44814672, 8870, 38004279, 8863, 8546) 
  AND cohort.phenotype_definition_number = 1 
GROUP BY 
  co.person_id, cohort.standard_code_set 
HAVING 
  COUNT(DISTINCT co.condition_concept_id) >= 1 
  AND COUNT(DISTINCT v.visit_start_date) > 2
),

age_criteria_2 AS (
SELECT DISTINCT co.person_id 
FROM 
  CHCO_DeID_Oct2018.person p, 
  CHCO_DeID_Oct2018.condition_occurrence co
  WHERE p.person_id = co.person_id 
GROUP BY
  co.person_id, p.birth_datetime
HAVING
  DATETIME_DIFF(DATETIME(MIN(co.condition_start_datetime)), DATETIME(p.birth_datetime), DAY) >= 1460
),

visit_criteria_1 AS (
SELECT DISTINCT v.person_id 
FROM 
  CHCO_DeID_Oct2018.visit_occurrence v 
GROUP BY 
  v.person_id, v.visit_end_datetime 
HAVING
  DATETIME(v.visit_end_datetime) >= DATETIME_ADD(DATETIME(MAX(v.visit_end_datetime)), INTERVAL -5 YEAR) 
),

rx_control_exclusion_criteria_1 AS (
SELECT DISTINCT de.person_id, cohort.standard_code_set AS code_set
FROM 
  CHCO_DeID_Oct2018.drug_exposure de,
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort
WHERE 
  de.drug_concept_id = CAST(cohort.standard_concept_id AS int64)
  AND cohort.phenotype_definition_number = 4
  AND de.person_id IN (
   SELECT person_id FROM age_criteria_2
    INTERSECT DISTINCT 
   SELECT person_id FROM visit_criteria_1)
GROUP BY de.person_id, cohort.standard_code_set
),

dx_control_exclusion_criteria_1 AS (
SELECT DISTINCT co.person_id, cohort.standard_code_set AS code_set
FROM 
  CHCO_DeID_Oct2018.condition_occurrence co,
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort
WHERE 
  co.condition_concept_id = CAST(cohort.standard_concept_id AS int64)
  AND cohort.phenotype_definition_number = 5
  AND co.person_id IN (
   SELECT person_id FROM age_criteria_2
    INTERSECT DISTINCT 
   SELECT person_id FROM visit_criteria_1)
GROUP BY co.person_id, cohort.standard_code_set
),

dx_control_exclusion_criteria_2 AS (
SELECT DISTINCT co.person_id, cohort.standard_code_set AS code_set
FROM 
  CHCO_DeID_Oct2018.condition_occurrence co,
  CHCO_DeID_Oct2018.ADHD_COHORT_VARIABLES cohort
WHERE 
  co.condition_concept_id = CAST(cohort.standard_concept_id AS int64)
  AND cohort.phenotype_definition_number = 6
  AND co.person_id IN (
   SELECT person_id FROM age_criteria_2
    INTERSECT DISTINCT 
   SELECT person_id FROM visit_criteria_1)
GROUP BY co.person_id, cohort.standard_code_set
)

SELECT * FROM
(SELECT person_id, code_set, 'ADHD_CASE_TYPE1' AS cohort_type FROM (
(SELECT person_id, code_set FROM age_criteria_1
INTERSECT DISTINCT 
SELECT person_id, code_set FROM dx_case_inclusion_criteria_1
INTERSECT DISTINCT 
SELECT person_id, code_set FROM rx_case_inclusion_criteria_1)
EXCEPT DISTINCT
SELECT person_id, code_set FROM dx_case_exclusion_criteria_1))

UNION ALL

(SELECT person_id, code_set, 'ADHD_CASE_TYPE2' AS cohort_type FROM (
(SELECT person_id, code_set FROM age_criteria_1
INTERSECT DISTINCT 
SELECT person_id, code_set FROM dx_case_inclusion_criteria_2)
EXCEPT DISTINCT
SELECT person_id, code_set FROM dx_case_exclusion_criteria_1))

UNION ALL

(SELECT person_id, code_set, 'ADHD_CONTROL' AS cohort_type FROM (
SELECT c.person_id, c.code_set
FROM
  CHCO_DeID_Oct2018.person p,
  ((SELECT person_id, code_set FROM rx_control_exclusion_criteria_1
  UNION DISTINCT
  SELECT person_id, code_set FROM dx_control_exclusion_criteria_1)
  INTERSECT DISTINCT
  SELECT person_id, code_set FROM dx_control_exclusion_criteria_2) c
WHERE
 p.person_id != c.person_id));
callahantiff commented 5 years ago

@mgkahn -- I will close this since there is no action required of you. Please re-open if anything needs clarification/editing.

🙏 Thank you again for your help today, I really enjoyed our meeting!

callahantiff commented 5 years ago

Optimized to improve performance (due to resource allocation errors on GBQ) - use this link to access query.

NOTE - That you will need to find-and-replace {database} with CHCO_DeID_Oct2018