NYCPlanning / db-safegraph

SafeGraph ETL
4 stars 1 forks source link

Ops usecase #73

Closed SPTKL closed 3 years ago

SPTKL commented 3 years ago

63

SPTKL commented 3 years ago

70

mgraber commented 3 years ago

Changes needed:

For example:

WITH 
daily_visits AS(
    SELECT 
        safegraph_place_id, 
        location_name, poi_cbg, visits_total, visitors_total, 
        max_visits_per_day, min_visits_per_day, 
        max_visits_per_hour, min_visits_per_hour,
        date_add('day', row_number() over(partition by safegraph_place_id), date_start) AS date_current, 
        CAST(visits AS SMALLINT) as visits, median_dwell
    FROM (
    SELECT
        safegraph_place_id,
        location_name,
        poi_cbg,
        CAST(SUBSTR(date_range_start, 1, 10) AS DATE) as date_start,
        CAST(SUBSTR(date_range_end, 1, 10) AS DATE) as date_end,
        raw_visitor_counts as visitors_total,
        raw_visit_counts as visits_total,
        array_max(cast(json_parse(visits_by_day) as array<SMALLINT>)) as max_visits_per_day,
        array_min(cast(json_parse(visits_by_day) as array<SMALLINT>)) as min_visits_per_day,
        array_max(cast(json_parse(visits_by_each_hour) as array<SMALLINT>)) as max_visits_per_hour,
        array_min(cast(json_parse(visits_by_each_hour) as array<SMALLINT>)) as min_visits_per_hour,
        cast(json_parse(visits_by_day) as array<SMALLINT>) as a,
        median_dwell
    FROM safegraph.weekly_patterns
    WHERE SUBSTR(poi_cbg,1,5) IN ('36085','36081','36061','36047','36005')
    AND CAST('2020-10-01' AS DATE) < dt
    AND CAST('2020-10-30' AS DATE) > dt
    ) b
    CROSS JOIN UNNEST(a) as t(visits)
)
SELECT
    a.safegraph_place_id,
    CAST(EXTRACT(year_of_week from a.date_current) AS VARCHAR)||'-'||
        LPAD(CAST(EXTRACT(week from a.date_current) AS VARCHAR),2,'0') as year_week,
    a.location_name as poi,
    a.poi_cbg,
    SUM(CASE WHEN EXTRACT(dow from a.date_current) NOT IN (1, 7) THEN visits END) as visits_weekday,
    SUM(CASE WHEN EXTRACT(dow from a.date_current) IN (1, 7) THEN visits END) as visits_weekend,
    SUM(a.visits) as visits_all_days,
    a.visits_total,
    a.visitors_total,
    a.max_visits_per_day,
    a.min_visits_per_day,
    a.max_visits_per_hour,
    a.min_visits_per_hour,
    a.median_dwell
FROM daily_visits a
GROUP BY 
    a.safegraph_place_id, EXTRACT(year_of_week from a.date_current), 
    EXTRACT(week from a.date_current), a.location_name, a.poi_cbg, a.max_visits_per_day,
    a.min_visits_per_day, a.max_visits_per_hour, a.min_visits_per_hour, a.visits_total, a.visitors_total,
    a.median_dwell
ORDER BY year_week, poi_cbg