Open AmandaDoyle opened 4 years ago
Confirmed that this problem is for several dates, and also applies to borough aggregations. For example:
-- Check for differences between sector and subsector visits by boro
WITH daily_visits AS(
SELECT safegraph_place_id, poi_cbg, date_add('day', row_number() over(partition by safegraph_place_id), date_start) AS date_current, CAST(visits AS SMALLINT) as visits
FROM (
SELECT
safegraph_place_id,
poi_cbg,
CAST(SUBSTR(date_range_start, 1, 10) AS DATE) as date_start,
CAST(SUBSTR(date_range_end, 1, 10) AS DATE) as date_end,
cast(json_parse(visits_by_day) as array<varchar>) as a
FROM safegraph.weekly_patterns
WHERE SUBSTR(poi_cbg,1,5) IN ('36085','36081','36061','36047','36005')
AND CAST('2020-10-01' AS DATE) < dt
AND CAST('2020-11-01' AS DATE) > dt
) b
CROSS JOIN UNNEST(a) as t(visits)
),
sectors AS(
SELECT
a.date_current as date,
(CASE WHEN SUBSTR(a.poi_cbg,1,5) = '36005' THEN 'BX'
WHEN SUBSTR(a.poi_cbg,1,5) = '36047' THEN 'BK'
WHEN SUBSTR(a.poi_cbg,1,5) = '36061' THEN 'MN'
WHEN SUBSTR(a.poi_cbg,1,5) = '36081' THEN 'QN'
WHEN SUBSTR(a.poi_cbg,1,5) = '36085' THEN 'SI'
END) as borough,
(CASE WHEN SUBSTR(a.poi_cbg,1,5) = '36005' THEN 2
WHEN SUBSTR(a.poi_cbg,1,5) = '36047' THEN 3
WHEN SUBSTR(a.poi_cbg,1,5) = '36061' THEN 1
WHEN SUBSTR(a.poi_cbg,1,5) = '36081' THEN 4
WHEN SUBSTR(a.poi_cbg,1,5) = '36085' THEN 5
END) as borocode,
SUBSTR(a.poi_cbg,1,5) as fips_county,
SUBSTR(b.naics_code,1,2) as sector,
SUM(a.visits) as total_visits
FROM daily_visits a
LEFT JOIN (
SELECT distinct safegraph_place_id, naics_code
FROM "safegraph"."core_poi"
WHERE region = 'NY' AND dt = CAST('2020-10-01' AS DATE)
) b
ON a.safegraph_place_id=b.safegraph_place_id
GROUP BY a.date_current,
SUBSTR(a.poi_cbg,1,5),
SUBSTR(b.naics_code,1,2)
ORDER BY date, borocode),
subsectors AS(
SELECT
a.date_current as date,
(CASE WHEN SUBSTR(a.poi_cbg,1,5) = '36005' THEN 'BX'
WHEN SUBSTR(a.poi_cbg,1,5) = '36047' THEN 'BK'
WHEN SUBSTR(a.poi_cbg,1,5) = '36061' THEN 'MN'
WHEN SUBSTR(a.poi_cbg,1,5) = '36081' THEN 'QN'
WHEN SUBSTR(a.poi_cbg,1,5) = '36085' THEN 'SI'
END) as borough,
(CASE WHEN SUBSTR(a.poi_cbg,1,5) = '36005' THEN 2
WHEN SUBSTR(a.poi_cbg,1,5) = '36047' THEN 3
WHEN SUBSTR(a.poi_cbg,1,5) = '36061' THEN 1
WHEN SUBSTR(a.poi_cbg,1,5) = '36081' THEN 4
WHEN SUBSTR(a.poi_cbg,1,5) = '36085' THEN 5
END) as borocode,
SUBSTR(a.poi_cbg,1,5) as fips_county,
SUBSTR(b.naics_code,1,3) as subsector,
SUM(a.visits) as total_visits
FROM daily_visits a
LEFT JOIN (
SELECT distinct safegraph_place_id, naics_code
FROM "safegraph"."core_poi"
WHERE region = 'NY' AND dt = CAST('2020-10-01' AS DATE)
) b
ON a.safegraph_place_id=b.safegraph_place_id
GROUP BY a.date_current,
SUBSTR(a.poi_cbg,1,5),
SUBSTR(b.naics_code,1,3)
ORDER BY date, borocode)
SELECT a.borough, a.date, a.sector, a.total_visits as sector_visits, SUM(b.total_visits) as sum_subsector_visits,
ABS(SUM(b.total_visits) - a.total_visits) as diff
FROM sectors a
JOIN subsectors b
ON a.sector = SUBSTR(b.subsector,1,2)
AND a.date = b.date
AND a.borough = b.borough
GROUP BY a.borough, a.date, a.sector, a.total_visits
ORDER BY diff DESC
Example: For 6/30/20, the sector file contains 179 visits for sector 72, ZIP code 10001 in MN. The subsector file contains 1992 visits for subsector 722 for that ZIP code.