CityofToronto / bdit_tt_request_app

Internal app for travel time data requests
https://trans-bdit.intra.prod-toronto.ca/traveltime-request/
2 stars 2 forks source link

Consolidate travel time logic in the app #121

Open Nate-Wessel opened 1 month ago

Nate-Wessel commented 1 month ago

I'd like to consolidate all the logic for calculating travel times in one place. Currently the app relies on the congestion network (table congestion_network_daily) which is updated daily as follows: (from here, but fluffed a bit)

WITH speed_links AS (
    SELECT
        segment_id,
        link_dir,
        links.length AS link_length,
        dt,
        extract(hour from tod)::int AS hr,
        harmean(mean) AS spd_avg,
        COUNT(tx)::int AS num_bin
    FROM here.ta
    INNER JOIN congestion.network_links_22_2 links USING (link_dir)
    WHERE
        dt >= _dt
        AND dt < _dt + interval '1 day'
    GROUP BY
        segment_id,
        link_dir,
        dt,
        hr,
        links.length
),

/*
Produces estimates of the average travel time for each 1 hour bin for each
individual segment (segment_id), where at least 80% of the segment (by distance)
has observations at the link (link_dir) level
*/
tt_hr AS (
    SELECT
        segment_id, 
        dt,
        hr,
        -- Adjusted to segment's length
        SUM(link_length / spd_avg  * 3.6 ) * total_length / SUM(link_length) AS tt,
        -- Not adjusted to segment's length, only summing up link_dir tt
        SUM(link_length / spd_avg  * 3.6 ) AS unadjusted_tt,\
        -- Sum of link_dir with data's length
        SUM(link_length) AS length_w_data,
        -- Adjusted tt valid to use if this value is True
        CASE
            WHEN SUM(link_length) >= 0.8 * total_length THEN True 
            ELSE False 
        END AS is_valid,
        sum(num_bin) AS num_bin
    FROM speed_links
    INNER JOIN congestion.network_segments USING (segment_id)
    GROUP BY
        segment_id,
        dt,
        hr,
        total_length
    ORDER BY
        segment_id,
        dt,
        hr
)

/*
Final Output: Inserts an estimate of the segment aggregation into
congestion.network_segments_daily
*/
--INSERT INTO congestion.network_segments_daily
SELECT
    segment_id,
    dt,
    hr,
    round(tt::numeric, 2) AS tt,
    round(unadjusted_tt::numeric, 2) AS unadjusted_tt,
    length_w_data,
    is_valid,
    num_bin
FROM tt_hr
Nate-Wessel commented 1 month ago

A simplified version of the above:

WITH speed_links AS (
    -- average speeds per hour, per link
    -- for links on the congestion network
    SELECT
        links.segment_id,
        ta.link_dir,
        links.length AS link_length,
        ta.dt,
        extract(HOUR FROM ta.tod)::int AS hr,
        harmean(ta.mean) AS spd_avg,
        COUNT(ta.tx)::int AS num_bin
    FROM here.ta
    INNER JOIN congestion.network_links_22_2 AS links USING (link_dir)
    WHERE ta.dt = '2024-01-01'::date
    GROUP BY
        links.segment_id,
        ta.link_dir,
        ta.dt,
        hr,
        links.length
)

/*
Produces estimates of the average travel time for each 1 hour bin for each
individual segment (segment_id), where at least 80% of the segment (by distance)
has observations at the link (link_dir) level
*/
SELECT
    segment_id, 
    speed_links.dt,
    speed_links.hr,
    -- Adjusted to segment's length
    round(
        SUM(speed_links.link_length / speed_links.spd_avg  * 3.6 )
        * segments.total_length
        / SUM(speed_links.link_length),
        2
    ) AS tt,
    -- Not adjusted to segment's length, only summing up link_dir tt
    round(
        SUM(speed_links.link_length / speed_links.spd_avg  * 3.6),
        2
    ) AS unadjusted_tt,
    -- Sum of link_dir with data's length
    SUM(speed_links.link_length) AS length_w_data,
    -- Adjusted tt valid to use if this value is True
    CASE
        WHEN SUM(speed_links.link_length) >= 0.8 * segments.total_length THEN TRUE 
        ELSE FALSE 
    END AS is_valid,
    sum(speed_links.num_bin) AS num_bin
FROM speed_links
INNER JOIN congestion.network_segments AS segments USING (segment_id)
GROUP BY
    segment_id,
    speed_links.dt,
    speed_links.hr,
    segments.total_length
ORDER BY
    segment_id,
    speed_links.dt,
    speed_links.hr