I tested the initial load with 1 million staging rows. Completed in a bit less than 4 minutes. This is far slower then expected.
INSERT ALL
WHEN 1=1 THEN
INTO d_ht (
vt_start,
vt_end,
is_deleted$,
deptno,
dname
)
VALUES (
vt_start,
vt_end,
is_deleted$,
deptno,
dname
)
LOG ERRORS INTO D_LOG$('d_api.init_load from D_STA$ started at 2016-09-24 11:54:26.201261') REJECT LIMIT 0
WHEN vt_end IS NULL THEN
INTO d (
is_deleted$,
deptno,
dname
)
VALUES (
is_deleted$,
deptno,
dname
)
LOG ERRORS INTO D_LOG$('d_api.init_load from D_STA$ started at 2016-09-24 11:54:26.201261') REJECT LIMIT 0
WITH
active AS (
-- truncate period columns if granularity is different to the data type default
SELECT TRUNC(vt_start, 'DDD') AS vt_start,
TRUNC(vt_end, 'DDD') AS vt_end,
is_deleted$,
deptno,
dname
FROM D_STA$
WHERE is_deleted$ IS NULL
),
valid AS (
-- filter invalid periods, e.g. produced by truncation
SELECT vt_start,
vt_end,
is_deleted$,
deptno,
dname
FROM active
WHERE vt_start < vt_end
OR vt_start IS NULL AND vt_end IS NOT NULL
OR vt_start IS NOT NULL AND vt_end IS NULL
OR vt_start IS NULL AND vt_end IS NULL
),
merged AS (
-- merge periods with identical column values into a single row
SELECT vt_start,
LAG (vt_end, 1, NULL) OVER (PARTITION BY deptno ORDER BY vt_start NULLS FIRST) AS gap_start$,
vt_end,
LEAD (vt_start, 1, NULL) OVER (PARTITION BY deptno ORDER BY vt_start NULLS FIRST) AS gap_end$,
is_deleted$,
deptno,
dname
FROM valid
MATCH_RECOGNIZE (
PARTITION BY is_deleted$, deptno, dname
ORDER BY vt_start NULLS FIRST
MEASURES FIRST(vt_start) AS vt_start, LAST(vt_end) AS vt_end
ONE ROW PER MATCH
PATTERN ( strt nxt* )
DEFINE nxt AS vt_start = PREV(vt_end)
)
),
combined AS (
-- active periods
SELECT vt_start,
vt_end,
NULL AS is_deleted$,
deptno,
dname
FROM merged
UNION ALL
-- deleted start periods
SELECT gap_start$ AS vt_start,
vt_start as vt_end,
1 AS is_deleted$,
deptno,
dname
FROM merged
WHERE vt_start IS NOT NULL AND gap_start$ IS NULL
UNION ALL
-- deleted non-starting periods
SELECT vt_end AS vt_start,
gap_end$ as vt_end,
1 AS is_deleted$,
deptno,
dname
FROM merged
WHERE vt_end != gap_end$
OR vt_end IS NULL AND gap_end$ IS NOT NULL
OR vt_end IS NOT NULL AND gap_end$ IS NULL
)
-- main
SELECT vt_start,
vt_end,
is_deleted$,
deptno,
dname
FROM combined
With the help of Real Time SQL Monitor the problem became clear (screenshots attached).
In this case it seams that the INSERT is done row by row. Over 93% is spent in the INSERT part, so the SELECT part is not the problem.
The statement is in need of optimisation. If it does not work with Multi-Table-Insert than two separate INSERTS are an option, even if the query part has to be done twice.
I tested the initial load with 1 million staging rows. Completed in a bit less than 4 minutes. This is far slower then expected.
With the help of Real Time SQL Monitor the problem became clear (screenshots attached).
In this case it seams that the INSERT is done row by row. Over 93% is spent in the INSERT part, so the SELECT part is not the problem.
The statement is in need of optimisation. If it does not work with Multi-Table-Insert than two separate INSERTS are an option, even if the query part has to be done twice.