NYCPlanning / db-safegraph

SafeGraph ETL
4 stars 1 forks source link

Request: daily county level, US counties -> NYC counties -- Dara #26

Closed SPTKL closed 4 years ago

SPTKL commented 4 years ago
WITH dataset AS (
  SELECT date_start, origin_census_block_group, device_count, map_keys(a) as cbg, a from (
     SELECT origin_census_block_group, device_count,
            CAST(SUBSTR(date_range_start, 1, 10) AS DATE) as date_start,
            CAST(json_parse(destination_cbgs) AS  map<varchar, varchar>) as a 
     FROM social_distancing) b
 ) 
 SELECT 
     date_start,
     SUBSTR(origin_census_block_group, 1, 5) as origin_fips,
     SUBSTR(desti_cbgs, 1, 5) as destination_fips, 
     SUM(CAST(a[desti_cbgs] as SMALLINT)) as visits,
     SUM(CAST(device_count as INTEGER)) as device_counts
 FROM dataset
 CROSS JOIN unnest(cbg) t(desti_cbgs)
 WHERE SUBSTR(desti_cbgs, 1, 5) IN ('36085','36081','36061','36047','36005')
 GROUP BY date_start, SUBSTR(origin_census_block_group, 1, 5), SUBSTR(desti_cbgs, 1, 5)

completed and delivered query id: 105291d9-29da-4718-9d33-6c1dc5024699

SPTKL commented 4 years ago

[10:53 AM] Dara Goldberg (DCP) Hey Baiyue, I was wondering if you might please run another SafeGraph query for me (smile), this time for NYC resident outflows?

So a daily O-D table by NYC origin borough for all U.S. counties destinations: -device_count -completely_home_device_count

-count of origin_cbg==destination_cbg -trip count at all U.S. destination counties (fips 5-digit)

WITH dataset AS (
    SELECT 
        date_start, 
        origin_census_block_group, 
        map_keys(a) as cbg, a 
    FROM (
        SELECT 
            origin_census_block_group,
            CAST(SUBSTR(date_range_start, 1, 10) AS DATE) as date_start,
            CAST(json_parse(destination_cbgs) AS  map<varchar, varchar>) as a 
        FROM social_distancing
        WHERE SUBSTR(origin_census_block_group, 1, 5) IN 
            ('36085','36081','36061','36047','36005')
     ) b
 ), trips as (
    SELECT 
        date_start,
        SUBSTR(origin_census_block_group, 1, 5) as origin_fips,
        SUBSTR(desti_cbgs, 1, 5) as destination_fips, 
        SUM(CAST(a[desti_cbgs] as SMALLINT)) as visits,
        SUM((
            CASE WHEN origin_census_block_group=desti_cbgs 
            THEN CAST(a[desti_cbgs] as SMALLINT) 
            ELSE 0 END
        )) as at_home_visits
    FROM dataset
    CROSS JOIN unnest(cbg) t(desti_cbgs)
    GROUP BY date_start, SUBSTR(origin_census_block_group, 1, 5), SUBSTR(desti_cbgs, 1, 5)
 ), devices as (
     SELECT 
        CAST(SUBSTR(date_range_start, 1, 10) AS DATE) as date_start,
        SUBSTR(origin_census_block_group, 1, 5) as origin_fips,
        SUM(CAST(device_count AS INTEGER)) as device_count,
        SUM(CAST(completely_home_device_count AS INTEGER)) as completely_home_device_count
    FROM social_distancing
    WHERE SUBSTR(origin_census_block_group, 1, 5) IN 
        ('36085','36081','36061','36047','36005')
    GROUP BY date_range_start, SUBSTR(origin_census_block_group, 1, 5)
 ) SELECT 
    a.date_start,
    a.origin_fips,
    a.destination_fips,
    a.visits,
    a.at_home_visits,
    b.device_count,
    b.completely_home_device_count
FROM trips a, devices b 
WHERE a.date_start=b.date_start AND a.origin_fips=b.origin_fips
SPTKL commented 4 years ago

done