CDCgov / prime-reportstream

ReportStream is a public intermediary tool for delivery of data between different parts of the healthcare ecosystem.
https://reportstream.cdc.gov
Creative Commons Zero v1.0 Universal
70 stars 39 forks source link

Partition Postgres for better performance #6441

Open jeremy-page opened 2 years ago

jeremy-page commented 2 years ago

Currently we do not archive/remove data from our RDS. This means that some queries will get slower and slower until they are non-functional. To address this we want to "archive" old data out the bigger tables so that data older than X is not found. To do this we probably should partition the data.

We may need to add some indexes etc also. This probably means some of the queries will need to be updated, at a minimum they should probably have a built in limit. A good article on partitioning

Applying these changes should be scripted in such a way we can apply it to the dev's local PG instances as well.

JosiahSiegel commented 1 year ago
CREATE TABLE IF NOT EXISTS public.action_log_p
(
    action_log_id bigint NOT NULL DEFAULT nextval('action_log_action_log_id_seq'::regclass),
    action_id bigint NOT NULL,
    report_id uuid,
    index integer,
    tracking_id character varying(128) COLLATE pg_catalog."default",
    type action_log_type,
    scope action_log_scope,
    detail jsonb,
    created_at timestamp with time zone NOT NULL,
    CONSTRAINT action_log_p_pkey PRIMARY KEY (action_log_id,created_at),
    CONSTRAINT action_log_p_action_id_fkey FOREIGN KEY (action_id)
        REFERENCES public.action (action_id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE CASCADE,
    CONSTRAINT action_log_p_report_id_fkey FOREIGN KEY (report_id)
        REFERENCES public.report_file (report_id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE CASCADE
)
partition by range (created_at)
WITH (
    OIDS = FALSE
)
TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.action_log_p
    OWNER to prime;

CREATE INDEX IF NOT EXISTS action_log_p_created_at_idx
    ON public.action_log_p USING btree
    (created_at ASC NULLS LAST)
    TABLESPACE pg_default;

CREATE INDEX IF NOT EXISTS action_log_p_tracking_id_idx
    ON public.action_log_p USING btree
    (tracking_id COLLATE pg_catalog."default" ASC NULLS LAST)
    TABLESPACE pg_default;

CREATE INDEX IF NOT EXISTS idx_action_log_p_action
    ON public.action_log_p USING btree
    (action_id ASC NULLS LAST)
    TABLESPACE pg_default;

---
CREATE TABLE action_log_p_default
    PARTITION OF action_log_p DEFAULT;

create table action_log_p_01_2020 partition of action_log_p for values from ('2020-01-01') to ('2020-02-01');
create table action_log_p_02_2020 partition of action_log_p for values from ('2020-02-01') to ('2020-03-01');
create table action_log_p_03_2020 partition of action_log_p for values from ('2020-03-01') to ('2020-04-01');
create table action_log_p_04_2020 partition of action_log_p for values from ('2020-04-01') to ('2020-05-01');
create table action_log_p_05_2020 partition of action_log_p for values from ('2020-05-01') to ('2020-06-01');
create table action_log_p_06_2020 partition of action_log_p for values from ('2020-06-01') to ('2020-07-01');
create table action_log_p_07_2020 partition of action_log_p for values from ('2020-07-01') to ('2020-08-01');
create table action_log_p_08_2020 partition of action_log_p for values from ('2020-08-01') to ('2020-09-01');
create table action_log_p_09_2020 partition of action_log_p for values from ('2020-09-01') to ('2020-10-01');
create table action_log_p_10_2020 partition of action_log_p for values from ('2020-10-01') to ('2020-11-01');
create table action_log_p_11_2020 partition of action_log_p for values from ('2020-11-01') to ('2020-12-01');
create table action_log_p_12_2020 partition of action_log_p for values from ('2020-12-01') to ('2021-01-01');
create table action_log_p_01_2021 partition of action_log_p for values from ('2021-01-01') to ('2021-02-01');
create table action_log_p_02_2021 partition of action_log_p for values from ('2021-02-01') to ('2021-03-01');
create table action_log_p_03_2021 partition of action_log_p for values from ('2021-03-01') to ('2021-04-01');
create table action_log_p_04_2021 partition of action_log_p for values from ('2021-04-01') to ('2021-05-01');
create table action_log_p_05_2021 partition of action_log_p for values from ('2021-05-01') to ('2021-06-01');
create table action_log_p_06_2021 partition of action_log_p for values from ('2021-06-01') to ('2021-07-01');
create table action_log_p_07_2021 partition of action_log_p for values from ('2021-07-01') to ('2021-08-01');
create table action_log_p_08_2021 partition of action_log_p for values from ('2021-08-01') to ('2021-09-01');
create table action_log_p_09_2021 partition of action_log_p for values from ('2021-09-01') to ('2021-10-01');
create table action_log_p_10_2021 partition of action_log_p for values from ('2021-10-01') to ('2021-11-01');
create table action_log_p_11_2021 partition of action_log_p for values from ('2021-11-01') to ('2021-12-01');
create table action_log_p_12_2021 partition of action_log_p for values from ('2021-12-01') to ('2022-01-01');
create table action_log_p_01_2022 partition of action_log_p for values from ('2022-01-01') to ('2022-02-01');
create table action_log_p_02_2022 partition of action_log_p for values from ('2022-02-01') to ('2022-03-01');
create table action_log_p_03_2022 partition of action_log_p for values from ('2022-03-01') to ('2022-04-01');
create table action_log_p_04_2022 partition of action_log_p for values from ('2022-04-01') to ('2022-05-01');
create table action_log_p_05_2022 partition of action_log_p for values from ('2022-05-01') to ('2022-06-01');
create table action_log_p_06_2022 partition of action_log_p for values from ('2022-06-01') to ('2022-07-01');
create table action_log_p_07_2022 partition of action_log_p for values from ('2022-07-01') to ('2022-08-01');
create table action_log_p_08_2022 partition of action_log_p for values from ('2022-08-01') to ('2022-09-01');
create table action_log_p_09_2022 partition of action_log_p for values from ('2022-09-01') to ('2022-10-01');
create table action_log_p_10_2022 partition of action_log_p for values from ('2022-10-01') to ('2022-11-01');
create table action_log_p_11_2022 partition of action_log_p for values from ('2022-11-01') to ('2022-12-01');
create table action_log_p_12_2022 partition of action_log_p for values from ('2022-12-01') to ('2023-01-01');
create table action_log_p_01_2023 partition of action_log_p for values from ('2023-01-01') to ('2023-02-01');
create table action_log_p_02_2023 partition of action_log_p for values from ('2023-02-01') to ('2023-03-01');
create table action_log_p_03_2023 partition of action_log_p for values from ('2023-03-01') to ('2023-04-01');
create table action_log_p_04_2023 partition of action_log_p for values from ('2023-04-01') to ('2023-05-01');
create table action_log_p_05_2023 partition of action_log_p for values from ('2023-05-01') to ('2023-06-01');
create table action_log_p_06_2023 partition of action_log_p for values from ('2023-06-01') to ('2023-07-01');
create table action_log_p_07_2023 partition of action_log_p for values from ('2023-07-01') to ('2023-08-01');
create table action_log_p_08_2023 partition of action_log_p for values from ('2023-08-01') to ('2023-09-01');
create table action_log_p_09_2023 partition of action_log_p for values from ('2023-09-01') to ('2023-10-01');
create table action_log_p_10_2023 partition of action_log_p for values from ('2023-10-01') to ('2023-11-01');
create table action_log_p_11_2023 partition of action_log_p for values from ('2023-11-01') to ('2023-12-01');
create table action_log_p_12_2023 partition of action_log_p for values from ('2023-12-01') to ('2024-01-01');
create table action_log_p_01_2024 partition of action_log_p for values from ('2024-01-01') to ('2024-02-01');
create table action_log_p_02_2024 partition of action_log_p for values from ('2024-02-01') to ('2024-03-01');
create table action_log_p_03_2024 partition of action_log_p for values from ('2024-03-01') to ('2024-04-01');
create table action_log_p_04_2024 partition of action_log_p for values from ('2024-04-01') to ('2024-05-01');
create table action_log_p_05_2024 partition of action_log_p for values from ('2024-05-01') to ('2024-06-01');
create table action_log_p_06_2024 partition of action_log_p for values from ('2024-06-01') to ('2024-07-01');
create table action_log_p_07_2024 partition of action_log_p for values from ('2024-07-01') to ('2024-08-01');
create table action_log_p_08_2024 partition of action_log_p for values from ('2024-08-01') to ('2024-09-01');
create table action_log_p_09_2024 partition of action_log_p for values from ('2024-09-01') to ('2024-10-01');
create table action_log_p_10_2024 partition of action_log_p for values from ('2024-10-01') to ('2024-11-01');
create table action_log_p_11_2024 partition of action_log_p for values from ('2024-11-01') to ('2024-12-01');
create table action_log_p_12_2024 partition of action_log_p for values from ('2024-12-01') to ('2025-01-01');
create table action_log_p_01_2025 partition of action_log_p for values from ('2025-01-01') to ('2025-02-01');
create table action_log_p_02_2025 partition of action_log_p for values from ('2025-02-01') to ('2025-03-01');
create table action_log_p_03_2025 partition of action_log_p for values from ('2025-03-01') to ('2025-04-01');
create table action_log_p_04_2025 partition of action_log_p for values from ('2025-04-01') to ('2025-05-01');
create table action_log_p_05_2025 partition of action_log_p for values from ('2025-05-01') to ('2025-06-01');
create table action_log_p_06_2025 partition of action_log_p for values from ('2025-06-01') to ('2025-07-01');
create table action_log_p_07_2025 partition of action_log_p for values from ('2025-07-01') to ('2025-08-01');
create table action_log_p_08_2025 partition of action_log_p for values from ('2025-08-01') to ('2025-09-01');
create table action_log_p_09_2025 partition of action_log_p for values from ('2025-09-01') to ('2025-10-01');
create table action_log_p_10_2025 partition of action_log_p for values from ('2025-10-01') to ('2025-11-01');
create table action_log_p_11_2025 partition of action_log_p for values from ('2025-11-01') to ('2025-12-01');
create table action_log_p_12_2025 partition of action_log_p for values from ('2025-12-01') to ('2026-01-01');
create table action_log_p_01_2026 partition of action_log_p for values from ('2026-01-01') to ('2026-02-01');
create table action_log_p_02_2026 partition of action_log_p for values from ('2026-02-01') to ('2026-03-01');
create table action_log_p_03_2026 partition of action_log_p for values from ('2026-03-01') to ('2026-04-01');
create table action_log_p_04_2026 partition of action_log_p for values from ('2026-04-01') to ('2026-05-01');
create table action_log_p_05_2026 partition of action_log_p for values from ('2026-05-01') to ('2026-06-01');
create table action_log_p_06_2026 partition of action_log_p for values from ('2026-06-01') to ('2026-07-01');
create table action_log_p_07_2026 partition of action_log_p for values from ('2026-07-01') to ('2026-08-01');
create table action_log_p_08_2026 partition of action_log_p for values from ('2026-08-01') to ('2026-09-01');
create table action_log_p_09_2026 partition of action_log_p for values from ('2026-09-01') to ('2026-10-01');
create table action_log_p_10_2026 partition of action_log_p for values from ('2026-10-01') to ('2026-11-01');
create table action_log_p_11_2026 partition of action_log_p for values from ('2026-11-01') to ('2026-12-01');
create table action_log_p_12_2026 partition of action_log_p for values from ('2026-12-01') to ('2027-01-01');
create table action_log_p_01_2027 partition of action_log_p for values from ('2027-01-01') to ('2027-02-01');
create table action_log_p_02_2027 partition of action_log_p for values from ('2027-02-01') to ('2027-03-01');
create table action_log_p_03_2027 partition of action_log_p for values from ('2027-03-01') to ('2027-04-01');
create table action_log_p_04_2027 partition of action_log_p for values from ('2027-04-01') to ('2027-05-01');
create table action_log_p_05_2027 partition of action_log_p for values from ('2027-05-01') to ('2027-06-01');
create table action_log_p_06_2027 partition of action_log_p for values from ('2027-06-01') to ('2027-07-01');
create table action_log_p_07_2027 partition of action_log_p for values from ('2027-07-01') to ('2027-08-01');
create table action_log_p_08_2027 partition of action_log_p for values from ('2027-08-01') to ('2027-09-01');
create table action_log_p_09_2027 partition of action_log_p for values from ('2027-09-01') to ('2027-10-01');
create table action_log_p_10_2027 partition of action_log_p for values from ('2027-10-01') to ('2027-11-01');
create table action_log_p_11_2027 partition of action_log_p for values from ('2027-11-01') to ('2027-12-01');
create table action_log_p_12_2027 partition of action_log_p for values from ('2027-12-01') to ('2028-01-01');
create table action_log_p_01_2028 partition of action_log_p for values from ('2028-01-01') to ('2028-02-01');
create table action_log_p_02_2028 partition of action_log_p for values from ('2028-02-01') to ('2028-03-01');
create table action_log_p_03_2028 partition of action_log_p for values from ('2028-03-01') to ('2028-04-01');
create table action_log_p_04_2028 partition of action_log_p for values from ('2028-04-01') to ('2028-05-01');
create table action_log_p_05_2028 partition of action_log_p for values from ('2028-05-01') to ('2028-06-01');
create table action_log_p_06_2028 partition of action_log_p for values from ('2028-06-01') to ('2028-07-01');
create table action_log_p_07_2028 partition of action_log_p for values from ('2028-07-01') to ('2028-08-01');
create table action_log_p_08_2028 partition of action_log_p for values from ('2028-08-01') to ('2028-09-01');
create table action_log_p_09_2028 partition of action_log_p for values from ('2028-09-01') to ('2028-10-01');
create table action_log_p_10_2028 partition of action_log_p for values from ('2028-10-01') to ('2028-11-01');
create table action_log_p_11_2028 partition of action_log_p for values from ('2028-11-01') to ('2028-12-01');
create table action_log_p_12_2028 partition of action_log_p for values from ('2028-12-01') to ('2029-01-01');
create table action_log_p_01_2029 partition of action_log_p for values from ('2029-01-01') to ('2029-02-01');
create table action_log_p_02_2029 partition of action_log_p for values from ('2029-02-01') to ('2029-03-01');
create table action_log_p_03_2029 partition of action_log_p for values from ('2029-03-01') to ('2029-04-01');
create table action_log_p_04_2029 partition of action_log_p for values from ('2029-04-01') to ('2029-05-01');
create table action_log_p_05_2029 partition of action_log_p for values from ('2029-05-01') to ('2029-06-01');
create table action_log_p_06_2029 partition of action_log_p for values from ('2029-06-01') to ('2029-07-01');
create table action_log_p_07_2029 partition of action_log_p for values from ('2029-07-01') to ('2029-08-01');
create table action_log_p_08_2029 partition of action_log_p for values from ('2029-08-01') to ('2029-09-01');
create table action_log_p_09_2029 partition of action_log_p for values from ('2029-09-01') to ('2029-10-01');
create table action_log_p_10_2029 partition of action_log_p for values from ('2029-10-01') to ('2029-11-01');
create table action_log_p_11_2029 partition of action_log_p for values from ('2029-11-01') to ('2029-12-01');
create table action_log_p_12_2029 partition of action_log_p for values from ('2029-12-01') to ('2030-01-01');
create table action_log_p_01_2030 partition of action_log_p for values from ('2030-01-01') to ('2030-02-01');
create table action_log_p_02_2030 partition of action_log_p for values from ('2030-02-01') to ('2030-03-01');
create table action_log_p_03_2030 partition of action_log_p for values from ('2030-03-01') to ('2030-04-01');
create table action_log_p_04_2030 partition of action_log_p for values from ('2030-04-01') to ('2030-05-01');
create table action_log_p_05_2030 partition of action_log_p for values from ('2030-05-01') to ('2030-06-01');
create table action_log_p_06_2030 partition of action_log_p for values from ('2030-06-01') to ('2030-07-01');
create table action_log_p_07_2030 partition of action_log_p for values from ('2030-07-01') to ('2030-08-01');
create table action_log_p_08_2030 partition of action_log_p for values from ('2030-08-01') to ('2030-09-01');
create table action_log_p_09_2030 partition of action_log_p for values from ('2030-09-01') to ('2030-10-01');
create table action_log_p_10_2030 partition of action_log_p for values from ('2030-10-01') to ('2030-11-01');
create table action_log_p_11_2030 partition of action_log_p for values from ('2030-11-01') to ('2030-12-01');
create table action_log_p_12_2030 partition of action_log_p for values from ('2030-12-01') to ('2031-01-01');
--

insert into action_log_p select * from action_log;
JosiahSiegel commented 1 year ago

Thread discussion unusually sized months in action_log and potential cleanup: https://semanticbits.slack.com/archives/C01HFHPNJEL/p1671633772974789