OHDSI / Achilles

Automated Characterization of Health Information at Large-scale Longitudinal Evidence Systems (ACHILLES) - descriptive statistics about a OMOP CDM database
https://ohdsi.github.io/Achilles/
130 stars 122 forks source link

I don't know exactly how much space I need #705

Closed gebilaoman closed 1 year ago

gebilaoman commented 1 year ago

Describe the bug When I execute Acllies, I always get the following error:

org.postgresql.util.PSQLException: ERROR:  could not resize shared memory segment "/PostgreSQL.93228929" to 8388608 bytes: No space left on device

I don't know exactly how much space I need.

Here is my table size:

show shared_buffers;
--------
shared_buffers
2GB
schemaname relname pg_size_pretty reltuples
omop drug_exposure 10179 MB 23647400
omop measurement 10177 MB 97890472
omop fact_relationship 5411 MB 108702560
omop specimen 3221 MB 49418812
omop observation 2314 MB 21892472
omop concept 1302 MB 8715154.0
omop concept_synonym 220 MB 1924924.0
omop procedure_occurrence 216 MB 2309610.0
omop dose_era 170 MB 2327847.0
omop drug_strength 152 MB 2243782.0
omop visit_detail 131 MB 1192028.0
omop temp 98 MB 2846871.0
omop condition_occurrence 69 MB 712518.0
omop visit_occurrence 54 MB 353856.0
omop visit_detail_assign 43 MB 751970.0
omop observation_period 23 MB 353856.0
omop cohort_attribute 15 MB 228403.0
omop person 4440 kB 46520.0
omop concept_cpt4 1376 kB 16922.0
omop death 792 kB 14849.0
omop provider 456 kB 7567.0
omop relationship 104 kB 690.0
omop concept_class 64 kB 416.0
omop care_site 64 kB 318.0
omop domain 16 kB 0.0
omop cdm_source 16 kB 0.0
omop vocabulary 40 kB 59.0
omop concept_relationship 8192 bytes 0.0
omop cost 8192 bytes 0.0
omop payer_plan_period 8192 bytes 0.0
omop concept_ancestor 0 bytes 0.0
omop source_to_concept_map 8192 bytes 0.0
omop cohort 0 bytes 0.0
omop cohort_definition 8192 bytes 0.0
omop location 8192 bytes 0.0
omop note_nlp 8192 bytes 0.0
omop note 8192 bytes 0.0
omop condition_era 0 bytes 0.0
omop device_exposure 8192 bytes 0.0
omop episode 0 bytes 0.0
omop episode_event 0 bytes 0.0
omop metadata 8192 bytes 0.0
omop drug_era 0 bytes 0.0

To Reproduce Steps to reproduce the behavior:

  1. Go to '...'
  2. Click on '....'
  3. Scroll down to '....'
  4. See error

Expected behavior A clear and concise description of what you expected to happen.

Screenshots If applicable, add screenshots to help explain your problem.

Desktop (please complete the following information):

Smartphone (please complete the following information):

Additional context Add any other context about the problem here.

fdefalco commented 1 year ago

The Achilles output is a very small fraction of the size of your database, it looks like you need to increase the storage available to your postgresql environment.

gebilaoman commented 1 year ago

Because I'm using PG,I have found the root cause of the problem. It takes a long time to execute two sets of SQL, namely, tempResults_717 and s_tmpach_dist_717, among which the execution time of statsView segment is too long. I guess it is related to syntax.

-- This is the old Sql fragment

-- create table aaa_priorStats  as
--     select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated
--     from aaa_statsView s
--              join aaa_statsView p on s.stratum_id = p.stratum_id and p.rn < = s.rn
--     group by s.stratum_id, s.count_value, s.total, s.rn
--;

create table aaa_priorStats  as
select  s.stratum_id, s.count_value, s.total, s.rn,  sum(total) over(partition by s.stratum_id order by rn asc) as accumulated
from aaa_statsView s
order by s.rn  asc
;

There were two large chunks of SQL that took a long time, so I split them:

tempResults_717和 s_tmpach_dist_717

旧sql

-- 717  Distribution of quantity by drug_concept_id
--HINT DISTRIBUTE_ON_KEY(stratum_id)
CREATE TABLE tempResults_717
AS
WITH rawData(stratum_id, count_value)  AS (
    SELECT
        de.drug_concept_id AS stratum_id,
        CAST(de.quantity AS NUMERIC) AS count_value
    FROM
        omop.drug_exposure de
            JOIN
        omop.observation_period op
        ON
                    de.person_id = op.person_id
                AND
                    de.drug_exposure_start_date >= op.observation_period_start_date
                AND
                    de.drug_exposure_start_date <= op.observation_period_end_date
    WHERE
        de.quantity IS NOT NULL
),
     overallStats (stratum_id, avg_value, stdev_value, min_value, max_value, total) as
         (
             select stratum_id,
                    CAST(avg(1.0 * count_value) AS NUMERIC) as avg_value,
                    CAST(STDDEV(count_value) AS NUMERIC) as stdev_value,
                    min(count_value) as min_value,
                    max(count_value) as max_value,
                    COUNT(*) as total
             FROM rawData
             group by stratum_id
         ),
     statsView (stratum_id, count_value, total, rn) as
         (
             select stratum_id, count_value, COUNT(*) as total, row_number() over (order by count_value) as rn
             FROM rawData
             group by stratum_id, count_value
         ),
     priorStats (stratum_id, count_value, total, accumulated) as
         (
             select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated
             from statsView s
                      join statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn
             group by s.stratum_id, s.count_value, s.total, s.rn
         )
SELECT
    717 as analysis_id,
    CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id,
    o.total as count_value,
    o.min_value,
    o.max_value,
    o.avg_value,
    o.stdev_value,
    MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value,
    MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value,
    MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value,
    MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value,
    MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value
FROM
    priorStats p
        join overallStats o on p.stratum_id = o.stratum_id
GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value
;
ANALYZE tempResults_717
;
--HINT DISTRIBUTE_ON_KEY(stratum_1)
CREATE TABLE s_tmpach_dist_717
AS
SELECT
    analysis_id, stratum_id as stratum_1,
    cast(null as varchar(255)) as stratum_2, cast(null as varchar(255)) as stratum_3, cast(null as varchar(255)) as stratum_4, cast(null as varchar(255)) as stratum_5,
    count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value
FROM
    tempResults_717
;
ANALYZE s_tmpach_dist_717
;
truncate table tempResults_717;
drop table tempResults_717;

新sql

create table aaa_rawData as
    SELECT
        de.drug_concept_id AS stratum_id,
        CAST(de.quantity AS NUMERIC) AS count_value
    FROM
        omop.drug_exposure de
            JOIN
        omop.observation_period op
        ON
                    de.person_id = op.person_id
                AND
                    de.drug_exposure_start_date >= op.observation_period_start_date
                AND
                    de.drug_exposure_start_date <= op.observation_period_end_date
    WHERE
        de.quantity IS NOT NULL;

create table aaa_overallStats as
    select stratum_id,
       CAST(avg(1.0 * count_value) AS NUMERIC) as avg_value,
       CAST(STDDEV(count_value) AS NUMERIC) as stdev_value,
       min(count_value) as min_value,
       max(count_value) as max_value,
       COUNT(*) as total
FROM aaa_rawData
group by stratum_id;
select count(*) from aaa_overallStats;

create table aaa_statsView as
    select stratum_id, count_value, COUNT(*) as total, row_number() over (order by count_value) as rn
    FROM aaa_rawData
    group by stratum_id, count_value;
select count(*) from aaa_statsView;

-- create table aaa_priorStats  as
--     select s.stratum_id, s.count_value, s.total, sum(p.total) as accumulated
--     from aaa_statsView s
--              join aaa_statsView p on s.stratum_id = p.stratum_id and p.rn <= s.rn
--     group by s.stratum_id, s.count_value, s.total, s.rn
-- ;

create table aaa_priorStats  as
    select  s.stratum_id, s.count_value, s.total, s.rn, sum(total) over(partition by s.stratum_id order by rn asc) as accumulated
        from aaa_statsView s
        order by s.rn  asc
;

CREATE TABLE tempResults_717 as
SELECT
    717 as analysis_id,
    CAST(o.stratum_id AS VARCHAR(255)) AS stratum_id,
    o.total as count_value,
    o.min_value,
    o.max_value,
    o.avg_value,
    o.stdev_value,
    MIN(case when p.accumulated >= .50 * o.total then count_value else o.max_value end) as median_value,
    MIN(case when p.accumulated >= .10 * o.total then count_value else o.max_value end) as p10_value,
    MIN(case when p.accumulated >= .25 * o.total then count_value else o.max_value end) as p25_value,
    MIN(case when p.accumulated >= .75 * o.total then count_value else o.max_value end) as p75_value,
    MIN(case when p.accumulated >= .90 * o.total then count_value else o.max_value end) as p90_value
FROM
    aaa_priorStats p
        join aaa_overallStats o on p.stratum_id = o.stratum_id
GROUP BY o.stratum_id, o.total, o.min_value, o.max_value, o.avg_value, o.stdev_value
;

CREATE TABLE s_tmpach_dist_717
AS
SELECT
    analysis_id, stratum_id as stratum_1,
    cast(null as varchar(255)) as stratum_2, cast(null as varchar(255)) as stratum_3, cast(null as varchar(255)) as stratum_4, cast(null as varchar(255)) as stratum_5,
    count_value, min_value, max_value, avg_value, stdev_value, median_value, p10_value, p25_value, p75_value, p90_value
FROM
    tempResults_717
;
ANALYZE s_tmpach_dist_717;

注意有段代码:priorStats,做了新的设计,该717代码段之所以缓慢到几十小时执行不完,就是因为这块

s_tmpach_1326

旧sql


--1326 Number of records by domain by visit detail concept id
CREATE TABLE s_tmpach_1326
AS
SELECT
    1326 AS analysis_id,
    CAST(v.visit_detail_concept_id AS VARCHAR(255)) AS stratum_1,
    v.cdm_table AS stratum_2,
    CAST(NULL AS VARCHAR(255)) AS stratum_3,
    CAST(NULL AS VARCHAR(255)) AS stratum_4,
    CAST(NULL AS VARCHAR(255)) AS stratum_5,
    v.record_count AS count_value
FROM
    (
        SELECT 'drug_exposure' cdm_table,
               COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
               COUNT(*) record_count
        FROM
            omop.drug_exposure de
                LEFT JOIN
            omop.visit_detail vd
            ON
                    de.visit_occurrence_id = vd.visit_occurrence_id
        GROUP BY
            vd.visit_detail_concept_id
        UNION
        SELECT
            'condition_occurrence' cdm_table,
            COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
            COUNT(*) record_count
        FROM
            omop.condition_occurrence co
                LEFT JOIN
            omop.visit_detail vd
            ON
                    co.visit_occurrence_id = vd.visit_occurrence_id
        GROUP BY
            vd.visit_detail_concept_id
        UNION
        SELECT
            'device_exposure' cdm_table,
            COALESCE(visit_detail_concept_id, 0) visit_detail_concept_id,
            COUNT(*) record_count
        FROM
            omop.device_exposure de
                LEFT JOIN
            omop.visit_detail vd
            ON
                    de.visit_occurrence_id = vd.visit_occurrence_id
        GROUP BY
            vd.visit_detail_concept_id
        UNION
        SELECT
            'procedure_occurrence' cdm_table,
            COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
            COUNT(*) record_count
        FROM
            omop.procedure_occurrence po
                LEFT JOIN
            omop.visit_detail vd
            ON
                    po.visit_occurrence_id = vd.visit_occurrence_id
        GROUP BY
            vd.visit_detail_concept_id
        UNION
        SELECT
            'measurement' cdm_table,
            COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
            COUNT(*) record_count
        FROM
            omop.measurement m
                LEFT JOIN
            omop.visit_detail vd
            ON
                    m.visit_occurrence_id = vd.visit_occurrence_id
        GROUP BY
            vd.visit_detail_concept_id
        UNION
        SELECT
            'observation' cdm_table,
            COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
            COUNT(*) record_count
        FROM
            omop.observation o
                LEFT JOIN
            omop.visit_detail vd
            ON
                    o.visit_occurrence_id = vd.visit_occurrence_id
        GROUP BY
            vd.visit_detail_concept_id
    ) v;
ANALYZE s_tmpach_1326
;

新sql

create table aaa_s_tmpach_1326_001 as
SELECT 'drug_exposure' cdm_table,
       COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
       COUNT(*) record_count
FROM
    omop.drug_exposure de
        LEFT JOIN
    omop.visit_detail vd
    ON
            de.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
    vd.visit_detail_concept_id
;

create table aaa_s_tmpach_1326_002 as
SELECT
    'condition_occurrence' cdm_table,
    COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
    COUNT(*) record_count
FROM
    omop.condition_occurrence co
        LEFT JOIN
    omop.visit_detail vd
    ON
            co.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
    vd.visit_detail_concept_id
;

create table aaa_s_tmpach_1326_003 as
SELECT
    'device_exposure' cdm_table,
    COALESCE(visit_detail_concept_id, 0) visit_detail_concept_id,
    COUNT(*) record_count
FROM
    omop.device_exposure de
        LEFT JOIN
    omop.visit_detail vd
    ON
            de.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
    vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_004 as
SELECT
    'procedure_occurrence' cdm_table,
    COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
    COUNT(*) record_count
FROM
    omop.procedure_occurrence po
        LEFT JOIN
    omop.visit_detail vd
    ON
            po.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
    vd.visit_detail_concept_id
;
create table aaa_s_tmpach_1326_005 as
SELECT
    'measurement' cdm_table,
    COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
    COUNT(*) record_count
FROM
    omop.measurement m
        LEFT JOIN
    omop.visit_detail vd
    ON
            m.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
    vd.visit_detail_concept_id
;

create table aaa_s_tmpach_1326_006 as
SELECT
    'observation' cdm_table,
    COALESCE(vd.visit_detail_concept_id, 0) visit_detail_concept_id,
    COUNT(*) record_count
FROM
    omop.observation o
        LEFT JOIN
    omop.visit_detail vd
    ON
            o.visit_occurrence_id = vd.visit_occurrence_id
GROUP BY
    vd.visit_detail_concept_id
;

-- 1326 Number of records by domain by visit detail concept id
CREATE TABLE s_tmpach_1326
AS
SELECT
    1326 AS analysis_id,
    CAST(v.visit_detail_concept_id AS VARCHAR(255)) AS stratum_1,
    v.cdm_table AS stratum_2,
    CAST(NULL AS VARCHAR(255)) AS stratum_3,
    CAST(NULL AS VARCHAR(255)) AS stratum_4,
    CAST(NULL AS VARCHAR(255)) AS stratum_5,
    v.record_count AS count_value
FROM
    (
        SELECT * from aaa_s_tmpach_1326_001
        union
        SELECT * from aaa_s_tmpach_1326_002
        union
        SELECT * from aaa_s_tmpach_1326_003
        union
        SELECT * from aaa_s_tmpach_1326_004
        union
        SELECT * from aaa_s_tmpach_1326_005
    ) v;
ANALYZE s_tmpach_1326;