CityofToronto / bdit_pgutils

Useful postgresql functions for our work
GNU General Public License v3.0
0 stars 0 forks source link

Infinite recursion in public.deps_save_and_drop_dependencies_dryrun when dependencies reference each other #15

Open gabrielwol opened 11 months ago

gabrielwol commented 11 months ago

When trying to save the dependencies for traffic.arterydata, we encountered an infinite recursion. I narrowed it down to: VIEW open_data.volumes_atr_vehicles_shortterm and MATERIALIZED VIEW open_data_staging.volumes_atr_shortterm_exceptions which refer to each other.

Full definitions: ```sql DROP VIEW open_data.volumes_atr_vehicles_shortterm; CREATE OR REPLACE VIEW open_data.volumes_atr_vehicles_shortterm AS SELECT flow_atr.centreline_id, flow_atr.direction, flow_atr.location, flow_atr.class_type, flow_atr.datetime_bin, flow_atr.volume_15min FROM open_data.flow_atr WHERE flow_atr.station_type = 'Short Term'::text AND flow_atr.volume_15min >= 0::numeric AND NOT (EXISTS ( SELECT exceptions.datetime_bin, exceptions.location FROM gwolofs.volumes_atr_shortterm_exceptions exceptions WHERE exceptions.datetime_bin = flow_atr.datetime_bin::date AND exceptions.location::text = flow_atr.location::text)); CREATE MATERIALIZED VIEW IF NOT EXISTS open_data_staging.volumes_atr_shortterm_exceptions TABLESPACE pg_default AS SELECT o1.datetime_bin::date AS datetime_bin, o1.location FROM open_data.volumes_atr_vehicles_shortterm o1 JOIN open_data.volumes_atr_vehicles_shortterm o2 ON o1.location::text = o2.location::text WHERE CASE WHEN (o1.datetime_bin - o2.datetime_bin) < '00:00:00'::interval THEN - (o1.datetime_bin - o2.datetime_bin) ELSE o1.datetime_bin - o2.datetime_bin END <= '01:00:00'::interval AND (o1.volume_15min > 20::numeric AND o2.volume_15min = 0::numeric OR o1.volume_15min < 450::numeric AND o2.volume_15min > 1000::numeric) UNION SELECT o2.datetime_bin::date AS datetime_bin, o2.location FROM open_data.volumes_atr_vehicles_shortterm o1 JOIN open_data.volumes_atr_vehicles_shortterm o2 ON o1.location::text = o2.location::text WHERE CASE WHEN (o1.datetime_bin - o2.datetime_bin) < '00:00:00'::interval THEN - (o1.datetime_bin - o2.datetime_bin) ELSE o1.datetime_bin - o2.datetime_bin END <= '01:00:00'::interval AND (o1.volume_15min > 20::numeric AND o2.volume_15min = 0::numeric OR o1.volume_15min < 450::numeric AND o2.volume_15min > 1000::numeric) UNION SELECT v.datetime_bin::date AS datetime_bin, v.location FROM open_data.volumes_atr_vehicles_shortterm v GROUP BY (v.datetime_bin::date), v.location HAVING count(*) <= 3 AND (max(v.volume_15min) > 1000::numeric OR min(v.volume_15min) <= 5::numeric) OR avg(v.volume_15min) = 0::numeric WITH DATA;
gabrielwol commented 11 months ago

I also created a repeatable example to show how this situation can occur using CREATE OR REPLACE VIEW ... AS:

-- Create the employees table
DROP TABLE IF EXISTS gwolofs.employees CASCADE;
CREATE TABLE gwolofs.employees (
    employee_id serial PRIMARY KEY,
    employee_name VARCHAR(50),
    department VARCHAR(50)
);

-- Insert some initial data
INSERT INTO gwolofs.employees (employee_name, department)
VALUES
    ('Alice', 'HR'),
    ('Bob', 'IT'),
    ('Bob', 'Sales');

DROP VIEW IF EXISTS gwolofs.it_employees CASCADE;
CREATE VIEW gwolofs.it_employees AS
SELECT * FROM gwolofs.employees WHERE department = 'IT';

DROP MATERIALIZED VIEW IF EXISTS gwolofs.it_employees_copy;
CREATE MATERIALIZED VIEW gwolofs.it_employees_copy AS
SELECT * FROM gwolofs.it_employees NATURAL JOIN gwolofs.employees;

--replace view with one that is recursively self-referential
CREATE OR REPLACE VIEW gwolofs.it_employees AS
SELECT * FROM gwolofs.it_employees_copy;

SELECT obj_schema, obj_name, obj_type, depth
FROM (
    WITH RECURSIVE recursive_deps(obj_schema, obj_name, obj_type, depth) AS
    (
        SELECT
            'gwolofs'::varchar COLLATE "C",
            'employees'::varchar COLLATE "C",
            null::varchar,
            0
        UNION
        SELECT
            dep_schema::varchar,
            dep_name::varchar,
            dep_type::varchar,
            recursive_deps.depth + 1
        FROM
            (
                SELECT
                    ref_nsp.nspname AS ref_schema,
                    ref_cl.relname AS ref_name,
                    rwr_cl.relkind AS dep_type,
                    rwr_nsp.nspname AS dep_schema,
                    rwr_cl.relname AS dep_name
                FROM pg_depend dep
                JOIN pg_class ref_cl ON dep.refobjid = ref_cl.oid
                JOIN pg_namespace ref_nsp ON ref_cl.relnamespace = ref_nsp.oid
                JOIN pg_rewrite rwr ON dep.objid = rwr.oid
                JOIN pg_class rwr_cl ON rwr.ev_class = rwr_cl.oid
                JOIN pg_namespace rwr_nsp ON rwr_cl.relnamespace = rwr_nsp.oid
                WHERE
                    dep.deptype = 'n'
                    AND dep.classid = 'pg_rewrite'::regclass
            ) AS deps
        JOIN recursive_deps ON
            deps.ref_schema = recursive_deps.obj_schema
            AND deps.ref_name = recursive_deps.obj_name
        WHERE
            --remove this and it will run infinitely
            depth < 10
            AND (
                deps.ref_schema != deps.dep_schema
                OR deps.ref_name != deps.dep_name)
            )
    SELECT obj_schema, obj_name, obj_type, depth
    FROM recursive_deps
    WHERE depth > 0
) t
chmnata commented 11 months ago

noting islam's suggestion here, we could add a logic for: skip view A when view A has been logged as a previous view's dependency.

gabrielwol commented 11 months ago

I couldn't figure out a way to include this in the current recursive query:

ERROR:  recursive reference to query "recursive_deps" must not appear within an outer join
LINE 64:         LEFT JOIN recursive_deps USING (obj_schema, obj_name...

I think we will have to restructure as a while loop.