CapsicoHealth / Tilda

Apache License 2.0
13 stars 6 forks source link

Fully convert to "smarter" pivot implementation for all cases #100

Closed ldhasson closed 6 years ago

ldhasson commented 6 years ago

In October or so, we changed how Pivot logic was implemented to move away from Postgres' built in "crosstab" feature as it had become a serious bottleneck in more complex views.

As a result, we used a more straightforward approach using aggregates (max, sum...), but left the old way of doing things active where there were more than 1 baseline column which required a collection-type in the previous implementation. This is no longer needed.

ldhasson commented 6 years ago

-- FIRSTTEST CASE: dmvisits.visittype5daysfromsocpivotview

-- OLD Pivot view code CREATE OR REPLACE VIEW dmvisits.visittype5daysfromsocpivotview AS SELECT (final_result._wt)."visitGroupRefnum" AS "visitGroupRefnum", (final_result._wt)."visitGroupStartTZ" AS "visitGroupStartTZ", (final_result._wt)."visitGroupStart" AS "visitGroupStart", final_result."MSW", final_result."PC" FROM crosstab('-- ''An aggregate view of Patients.Visit per visitGroupRefnum pivoted on Patients.Visit.typeAtAdmission for visits within 5 days of episode start of care.'' select (DMVISITS.VALIDVISITVIEW."visitGroupRefnum" , PATIENTS.VISITGROUP."startTZ" , PATIENTS.VISITGROUP."start" )::DMVISITS.VISITTYPE5DAYSFROMSOCPIVOTVIEW_WT as _wt , trim(DMVISITS.VALIDVISITVIEW."typeAtAdmission") as "visitTypeAtAdmission" -- The type of the visit at admission , count(*) as "count" from DMVISITS.VALIDVISITVIEW inner join PATIENTS.VISITGROUP on Patients.VisitGroup."refnum" = ValidVisitView."visitGroupRefnum" where (Tilda.DaysBetween(PATIENTS.VISITGROUP."start", DMVISITS.VALIDVISITVIEW."start", false) <= 5) and DMVISITS.VALIDVISITVIEW."typeAtAdmission" in (''MSW'', ''PC'') group by 1, 2 order by 1, 2 '::text, 'select unnest(ARRAY[''MSW'', ''PC'']) as _x order by _x '::text) final_result(_wt dmvisits.visittype5daysfromsocpivotview_wt, "MSW" bigint, "PC" bigint);

-- NEW Pivot view code CREATE OR REPLACE VIEW dmvisits.visittype5daysfromsocpivotview_new AS WITH t AS ( SELECT validvisitview."visitGroupRefnum", visitgroup."startTZ" AS "visitGroupStartTZ", visitgroup.start AS "visitGroupStart", btrim(validvisitview."typeAtAdmission"::text) AS "visitTypeAtAdmission", count(*) AS count FROM dmvisits.validvisitview JOIN patients.visitgroup ON visitgroup.refnum = validvisitview."visitGroupRefnum" WHERE tilda.daysbetween(visitgroup.start, validvisitview.start, false) <= 5 AND (validvisitview."typeAtAdmission"::text = ANY (ARRAY['MSW'::character varying, 'PC'::character varying]::text[])) GROUP BY validvisitview."visitGroupRefnum", visitgroup."startTZ", visitgroup.start, (btrim(validvisitview."typeAtAdmission"::text)) ORDER BY validvisitview."visitGroupRefnum", visitgroup."startTZ", visitgroup.start, (btrim(validvisitview."typeAtAdmission"::text)) ) SELECT t."visitGroupRefnum", t."visitGroupStartTZ", t."visitGroupStart", sum(t.count) FILTER (WHERE t."visitTypeAtAdmission" = 'MSW'::text) AS "MSW", sum(t.count) FILTER (WHERE t."visitTypeAtAdmission" = 'PC'::text) AS "PC" FROM t GROUP BY t."visitGroupRefnum", t."visitGroupStartTZ", t."visitGroupStart";

-- Dumping view into tables drop table visittype5daysfromsocpivotview_t1; create table visittype5daysfromsocpivotview_t1 as select * from dmvisits.visittype5daysfromsocpivotview -- 30514

drop table visittype5daysfromsocpivotview_t2; create table visittype5daysfromsocpivotview_t2 as select * from dmvisits.visittype5daysfromsocpivotview_new -- 30514

select count() as "countAll" ,count() filter (where visittype5daysfromsocpivotview_t1."visitGroupRefnum" is null) as "countNo1" ,count() filter (where visittype5daysfromsocpivotview_t2."visitGroupRefnum" is null) as "countNo2" ,count() filter (where coalesce(visittype5daysfromsocpivotview_t1."MSW",-666) = coalesce(visittype5daysfromsocpivotview_t2."MSW", -666)) as "countMSWSame" ,count(*) filter (where coalesce(visittype5daysfromsocpivotview_t1."PC" ,-666) = coalesce(visittype5daysfromsocpivotview_t2."PC" , -666)) as "countPCSame" from visittype5daysfromsocpivotview_t1 full outer join visittype5daysfromsocpivotview_t2 on visittype5daysfromsocpivotview_t2."visitGroupRefnum" = visittype5daysfromsocpivotview_t1."visitGroupRefnum" --'countAll';'countNo1';'countNo2';'countMSWSame';'countPCSame' --30514;0;0;30514;30514

ldhasson commented 6 years ago


-- SECOND TEST CASE: datamart2.dxcurrenthistorypivotview


-- OLD Pivot view code CREATE OR REPLACE VIEW datamart2.dxcurrenthistorypivotview AS SELECT (final_result._wt)."patientRefnum" AS "patientRefnum", (final_result._wt).p AS p, final_result."ALZHEIMERS", final_result."AMI", final_result."ANEMIA", final_result."BREAST_CANCER", final_result."CAD_MI", final_result."CHF", final_result."CHRONIC_KIDNEY_DISEASE", final_result."COPD_RESPIRATORY", final_result."DEMENTIA", final_result."DIABETES", final_result."DIABETES_GEST", final_result."HIV", final_result."HYPERTENSION", final_result."IVD_AMI", final_result."LUNG_CANCER", final_result."MULTIPLE_SCLEROSIS", final_result."PARKINSONS", final_result."PNEUMONIA", final_result."PREGNANCY" FROM crosstab('-- ''A yearly-bined pivoted view of patient current diagnoses.'' select (DATAMART2.DXCURRENTHISTORYVIEW."patientRefnum" , DATAMART2.DXCURRENTHISTORYVIEW."p" )::DATAMART2.DXCURRENTHISTORYPIVOTVIEW_WT as _wt , trim(DATAMART2.DXCURRENTHISTORYVIEW."dxClass") as "dxClass" -- Formula column: Dx class , count(*) as "dxCount" from DATAMART2.DXCURRENTHISTORYVIEW where DATAMART2.DXCURRENTHISTORYVIEW."dxClass" in (''ALZHEIMERS'', ''AMI'', ''ANEMIA'', ''BREAST_CANCER'', ''CAD_MI'', ''CHF'', ''CHRONIC_KIDNEY_DISEASE'', ''COPD_RESPIRATORY'', ''DEMENTIA'', ''DIABETES'', ''DIABETES_GEST'', ''HIV'', ''HYPERTENSION'', ''IVD_AMI'', ''LUNG_CANCER'', ''MULTIPLE_SCLEROSIS'', ''PARKINSONS'', ''PNEUMONIA'', ''PREGNANCY'') group by 1, 2 order by 1, 2 '::text, 'select unnest(ARRAY[''ALZHEIMERS'', ''AMI'', ''ANEMIA'', ''BREAST_CANCER'', ''CAD_MI'', ''CHF'', ''CHRONIC_KIDNEY_DISEASE'', ''COPD_RESPIRATORY'', ''DEMENTIA'', ''DIABETES'', ''DIABETES_GEST'', ''HIV'', ''HYPERTENSION'', ''IVD_AMI'', ''LUNG_CANCER'', ''MULTIPLE_SCLEROSIS'', ''PARKINSONS'', ''PNEUMONIA'', ''PREGNANCY'']) as _x order by _x '::text) final_result(_wt datamart2.dxcurrenthistorypivotview_wt, "ALZHEIMERS" bigint, "AMI" bigint, "ANEMIA" bigint, "BREAST_CANCER" bigint, "CAD_MI" bigint, "CHF" bigint, "CHRONIC_KIDNEY_DISEASE" bigint, "COPD_RESPIRATORY" bigint, "DEMENTIA" bigint, "DIABETES" bigint, "DIABETES_GEST" bigint, "HIV" bigint, "HYPERTENSION" bigint, "IVD_AMI" bigint, "LUNG_CANCER" bigint, "MULTIPLE_SCLEROSIS" bigint, "PARKINSONS" bigint, "PNEUMONIA" bigint, "PREGNANCY" bigint);

-- NEW Pivot view code CREATE OR REPLACE VIEW datamart2.dxcurrenthistorypivotview_new AS WITH t AS ( SELECT dxcurrenthistoryview."patientRefnum", dxcurrenthistoryview.p, btrim(dxcurrenthistoryview."dxClass") AS "dxClass", count(*) AS "dxCount" FROM datamart2.dxcurrenthistoryview WHERE dxcurrenthistoryview."dxClass" = ANY (ARRAY['ALZHEIMERS'::text, 'AMI'::text, 'ANEMIA'::text, 'BREAST_CANCER'::text, 'CAD_MI'::text, 'CHF'::text, 'CHRONIC_KIDNEY_DISEASE'::text, 'COPD_RESPIRATORY'::text, 'DEMENTIA'::text, 'DIABETES'::text, 'DIABETES_GEST'::text, 'HIV'::text, 'HYPERTENSION'::text, 'IVD_AMI'::text, 'LUNG_CANCER'::text, 'MULTIPLE_SCLEROSIS'::text, 'PARKINSONS'::text, 'PNEUMONIA'::text, 'PREGNANCY'::text]) GROUP BY dxcurrenthistoryview."patientRefnum", dxcurrenthistoryview.p, (btrim(dxcurrenthistoryview."dxClass")) ORDER BY dxcurrenthistoryview."patientRefnum", dxcurrenthistoryview.p, (btrim(dxcurrenthistoryview."dxClass")) ) SELECT t."patientRefnum", t.p, sum(t."dxCount") FILTER (WHERE t."dxClass" = 'ALZHEIMERS'::text) AS "ALZHEIMERS", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'AMI'::text) AS "AMI", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'ANEMIA'::text) AS "ANEMIA", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'BREAST_CANCER'::text) AS "BREAST_CANCER", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'CAD_MI'::text) AS "CAD_MI", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'CHF'::text) AS "CHF", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'CHRONIC_KIDNEY_DISEASE'::text) AS "CHRONIC_KIDNEY_DISEASE", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'COPD_RESPIRATORY'::text) AS "COPD_RESPIRATORY", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'DEMENTIA'::text) AS "DEMENTIA", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'DIABETES'::text) AS "DIABETES", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'DIABETES_GEST'::text) AS "DIABETES_GEST", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'HIV'::text) AS "HIV", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'HYPERTENSION'::text) AS "HYPERTENSION", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'IVD_AMI'::text) AS "IVD_AMI", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'LUNG_CANCER'::text) AS "LUNG_CANCER", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'MULTIPLE_SCLEROSIS'::text) AS "MULTIPLE_SCLEROSIS", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'PARKINSONS'::text) AS "PARKINSONS", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'PNEUMONIA'::text) AS "PNEUMONIA", sum(t."dxCount") FILTER (WHERE t."dxClass" = 'PREGNANCY'::text) AS "PREGNANCY" FROM t GROUP BY t."patientRefnum", t.p;

-- Dumping view into tables drop table if exists dxcurrenthistorypivotview_T1; create table dxcurrenthistorypivotview_T1 as select * from datamart2.dxcurrenthistorypivotview -- 66037

drop table if exists dxcurrenthistorypivotview_T2; create table dxcurrenthistorypivotview_T2 as select * from datamart2.dxcurrenthistorypivotview -- 66037

-- Validating matching counts select count() as "countAll" ,count() filter (where dxcurrenthistorypivotview_T1."patientRefnum" is null) as "countNo1" ,count() filter (where dxcurrenthistorypivotview_T2."patientRefnum" is null) as "countNo2" ,count() filter (where coalesce(dxcurrenthistorypivotview_T1."ALZHEIMERS",-666) = coalesce(dxcurrenthistorypivotview_T2."ALZHEIMERS", -666)) as "countALZHEIMERSSame" ,count() filter (where coalesce(dxcurrenthistorypivotview_T1."LUNG_CANCER" ,-666) = coalesce(dxcurrenthistorypivotview_T2."LUNG_CANCER" , -666)) as "countLUNG_CANCERSame" ,count() filter (where coalesce(dxcurrenthistorypivotview_T1."HYPERTENSION" ,-666) = coalesce(dxcurrenthistorypivotview_T2."HYPERTENSION" , -666)) as "countHYPERTENSIONSame" from dxcurrenthistorypivotview_T1 full outer join dxcurrenthistorypivotview_T2 on dxcurrenthistorypivotview_T2."patientRefnum" = dxcurrenthistorypivotview_T1."patientRefnum" and dxcurrenthistorypivotview_T2."p" = dxcurrenthistorypivotview_T1."p" --'countAll';'countNo1';'countNo2';'countALZHEIMERSSame';'countLUNG_CANCERSame';'countHYPERTENSIONSame' 66037;0;0;66037;66037;66037

ldhasson commented 6 years ago

Two complex test cases of before and after testing that the new pivot views without "crosstab" are matching results from the old pivot views that used "crosstab"

CapsicoSmith commented 6 years ago

Reviewed and observed test case creation and validation with Laurent. Closing ticket.