Closed gebilaoman closed 1 year ago
The Achilles output is a very small fraction of the size of your database, it looks like you need to increase the storage available to your postgresql environment.
Because I'm using PG,I have found the root cause of the problem. It takes a long time to execute two sets of SQL, namely, tempResults_717 and s_tmpach_dist_717, among which the execution time of statsView segment is too long. I guess it is related to syntax.
-- This is the old Sql fragment
-- create table aaa_priorStats as
-- select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated
-- from aaa_statsView s
-- join aaa_statsView p on s.stratum_id = p.stratum_id and p.rn < = s.rn
-- group by s.stratum_id, s.count_value, s.total, s.rn
--;
create table aaa_priorStats as
select s.stratum_id, s.count_value, s.total, s.rn, sum(total) over(partition by s.stratum_id order by rn asc) as accumulated
from aaa_statsView s
order by s.rn asc
;
There were two large chunks of SQL that took a long time, so I split them:
-- 717 Distribution of quantity by drug_concept_id
--HINT DISTRIBUTE_ON_KEY(stratum_id)
CREATE TABLE tempResults_717
AS
WITH rawData(stratum_id, count_value) AS (
SELECT
de.drug_concept_id AS stratum_id,
CAST(de.quantity AS NUMERIC) AS count_value
FROM
omop.drug_exposure de
JOIN
omop.observation_period op
ON
de.person_id = op.person_id
AND
de.drug_exposure_start_date >= op.observation_period_start_date
AND
de.drug_exposure_start_date <= op.observation_period_end_date
WHERE
de.quantity IS NOT NULL
),
overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as
(
select stratum_id,
CAST(avg(1.0 * count_value) AS NUMERIC) as avg_value,
CAST(STDDEV(count_value) AS NUMERIC) as stdev_value,
min(count_value) as min_value,
max(count_value) as max_value,
COUNT(*) as total
FROM rawData
group by stratum_id
),
statsView (stratum_id, count_value, total, rn) as
(
select stratum_id, count_value, COUNT(*) as total, row_number() over (order by count_value) as rn
FROM rawData
group by stratum_id, count_value
),
priorStats (stratum_id, count_value, total, accumulated) as
(
select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated
from statsView s
join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn
group by s.stratum_id, s.count_value, s.total, s.rn
)
SELECT
717 as analysis_id,
CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id,
o.total as count_value,
o.min_value,
o.max_value,
o.avg_value,
o.stdev_value,
MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value,
MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value,
MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value,
MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value,
MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value
FROM
priorStats p
join overallStats o on p.stratum_id = o.stratum_id
GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value
;
ANALYZE tempResults_717
;
--HINT DISTRIBUTE_ON_KEY(stratum_1)
CREATE TABLE s_tmpach_dist_717
AS
SELECT
analysis_id, stratum_id as stratum_1,
cast(null as varchar(255)) as stratum_2, cast(null as varchar(255)) as stratum_3, cast(null as varchar(255)) as stratum_4, cast(null as varchar(255)) as stratum_5,
count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value
FROM
tempResults_717
;
ANALYZE s_tmpach_dist_717
;
truncate table tempResults_717;
drop table tempResults_717;
create table aaa_rawData as
SELECT
de.drug_concept_id AS stratum_id,
CAST(de.quantity AS NUMERIC) AS count_value
FROM
omop.drug_exposure de
JOIN
omop.observation_period op
ON
de.person_id = op.person_id
AND
de.drug_exposure_start_date >= op.observation_period_start_date
AND
de.drug_exposure_start_date <= op.observation_period_end_date
WHERE
de.quantity IS NOT NULL;
create table aaa_overallStats as
select stratum_id,
CAST(avg(1.0 * count_value) AS NUMERIC) as avg_value,
CAST(STDDEV(count_value) AS NUMERIC) as stdev_value,
min(count_value) as min_value,
max(count_value) as max_value,
COUNT(*) as total
FROM aaa_rawData
group by stratum_id;
select count(*) from aaa_overallStats;
create table aaa_statsView as
select stratum_id, count_value, COUNT(*) as total, row_number() over (order by count_value) as rn
FROM aaa_rawData
group by stratum_id, count_value;
select count(*) from aaa_statsView;
-- create table aaa_priorStats as
-- select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated
-- from aaa_statsView s
-- join aaa_statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn
-- group by s.stratum_id, s.count_value, s.total, s.rn
-- ;
create table aaa_priorStats as
select s.stratum_id, s.count_value, s.total, s.rn, sum(total) over(partition by s.stratum_id order by rn asc) as accumulated
from aaa_statsView s
order by s.rn asc
;
CREATE TABLE tempResults_717 as
SELECT
717 as analysis_id,
CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id,
o.total as count_value,
o.min_value,
o.max_value,
o.avg_value,
o.stdev_value,
MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value,
MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value,
MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value,
MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value,
MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value
FROM
aaa_priorStats p
join aaa_overallStats o on p.stratum_id = o.stratum_id
GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value
;
CREATE TABLE s_tmpach_dist_717
AS
SELECT
analysis_id, stratum_id as stratum_1,
cast(null as varchar(255)) as stratum_2, cast(null as varchar(255)) as stratum_3, cast(null as varchar(255)) as stratum_4, cast(null as varchar(255)) as stratum_5,
count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value
FROM
tempResults_717
;
ANALYZE s_tmpach_dist_717;
注意有段代码:priorStats,做了新的设计,该717代码段之所以缓慢到几十小时执行不完,就是因为这块
--1326 Number of records by domain by visit detail concept id
CREATE TABLE s_tmpach_1326
AS
SELECT
1326 AS analysis_id,
CAST(v.visit_detail_concept_id AS VARCHAR(255)) AS stratum_1,
v.cdm_table AS stratum_2,
CAST(NULL AS VARCHAR(255)) AS stratum_3,
CAST(NULL AS VARCHAR(255)) AS stratum_4,
CAST(NULL AS VARCHAR(255)) AS stratum_5,
v.record_count AS count_value
FROM
(
SELECT 'drug_exposure' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.drug_exposure de
LEFT JOIN
omop.visit_detail vd
ON
de.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
UNION
SELECT
'condition_occurrence' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.condition_occurrence co
LEFT JOIN
omop.visit_detail vd
ON
co.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
UNION
SELECT
'device_exposure' cdm_table,
COALESCE(visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.device_exposure de
LEFT JOIN
omop.visit_detail vd
ON
de.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
UNION
SELECT
'procedure_occurrence' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.procedure_occurrence po
LEFT JOIN
omop.visit_detail vd
ON
po.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
UNION
SELECT
'measurement' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.measurement m
LEFT JOIN
omop.visit_detail vd
ON
m.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
UNION
SELECT
'observation' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.observation o
LEFT JOIN
omop.visit_detail vd
ON
o.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
) v;
ANALYZE s_tmpach_1326
;
create table aaa_s_tmpach_1326_001 as
SELECT 'drug_exposure' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.drug_exposure de
LEFT JOIN
omop.visit_detail vd
ON
de.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_002 as
SELECT
'condition_occurrence' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.condition_occurrence co
LEFT JOIN
omop.visit_detail vd
ON
co.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_003 as
SELECT
'device_exposure' cdm_table,
COALESCE(visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.device_exposure de
LEFT JOIN
omop.visit_detail vd
ON
de.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_004 as
SELECT
'procedure_occurrence' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.procedure_occurrence po
LEFT JOIN
omop.visit_detail vd
ON
po.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_005 as
SELECT
'measurement' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.measurement m
LEFT JOIN
omop.visit_detail vd
ON
m.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_006 as
SELECT
'observation' cdm_table,
COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
COUNT(*) record_count
FROM
omop.observation o
LEFT JOIN
omop.visit_detail vd
ON
o.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
vd.visit_detail_concept_id
;
-- 1326 Number of records by domain by visit detail concept id
CREATE TABLE s_tmpach_1326
AS
SELECT
1326 AS analysis_id,
CAST(v.visit_detail_concept_id AS VARCHAR(255)) AS stratum_1,
v.cdm_table AS stratum_2,
CAST(NULL AS VARCHAR(255)) AS stratum_3,
CAST(NULL AS VARCHAR(255)) AS stratum_4,
CAST(NULL AS VARCHAR(255)) AS stratum_5,
v.record_count AS count_value
FROM
(
SELECT * from aaa_s_tmpach_1326_001
union
SELECT * from aaa_s_tmpach_1326_002
union
SELECT * from aaa_s_tmpach_1326_003
union
SELECT * from aaa_s_tmpach_1326_004
union
SELECT * from aaa_s_tmpach_1326_005
) v;
ANALYZE s_tmpach_1326;
Describe the bug When I execute Acllies, I always get the following error:
I don't know exactly how much space I need.
Here is my table size:
To Reproduce Steps to reproduce the behavior:
Expected behavior A clear and concise description of what you expected to happen.
Screenshots If applicable, add screenshots to help explain your problem.
Desktop (please complete the following information):
Smartphone (please complete the following information):
Additional context Add any other context about the problem here.