Open stucka opened 2 months ago
I have messaged a state employee trying to figure out what's going on. If I download the file myself I see Kelpr but not NBCUniversal, though both show up in web site.
Illinois' scraper may predate the platform for Job Center, which might make a rebuild considerably easier and smarter. ... if the data shows up through there.
This doesn't look like other Job Center sites. However, the JSON offers far more fields including lat/long and is stupidly easy to parse.
Illinois has fixed its data.
However, if there's another reason to distrust the Excel function or we want the other fields, I have code to process it (which should be adapted to use cache and util functions):
import requests
import csv
payload = """{"direction":"-1","search":"","page":0,"dateReportedStart":null,"dateReportedEnd":null,"lastDateReportedStart":"1990-01-01T17:00:00.000Z","lastDateReportedEnd":"2042-12-31T17:00:00.000Z","dateRangeOpt":"-1","resultsView":1,"layoffTypes":[],"statuses":[4],"reasons":[],"eventCauses":[],"naicsCodes":"1","hasWarnNotice":0,"industries":[],"naics":[],"cities":[],"counties":[],"lwias":[],"edrs":[],"users":[],"accessList":[],"distance":".5","lat":0,"lng":0,"searchFound":false,"locationName":"","locationAddress":"","searchControl":"","trade":"0","unionsInvolved":"0","bookmark":false,"showAdvance":false,"mapMode":"1","showLabels":"0","showLayoffMarkers":true,"markerSet":"1","geoLocation":"1","memberType":"1","graphDisplay":"1","mapCenterLat":40.1331,"mapCenterLng":-89.3985,"zoom":7,"take":250,"totalElements":4513,"totalPages":0,"pageNumber":0,"skip":0,"column":"LastReportDate","columnSort":-1,"initialLoad":false,"monthRangeOpt":7,"yearRangeOpt":2024}"""
post_url = "https://apps.illinoisworknet.com/iebs/api/public/searchWarn"
headers = {
"Content-Type": "application/json",
}
r = requests.post(post_url, data=payload, headers=headers)
local_json = r.json()
masterlist = []
masterheaders = set()
for entry in local_json['Layoffs']:
for item in entry:
masterheaders.add(item)
for entry in local_json['Layoffs']:
line = {}
for item in masterheaders:
if item in entry:
line[item] = entry[item]
else:
print(f"Missing {item}")
line[item] = None
masterlist.append(line)
with open("il-json.csv", "w", newline="", encoding="utf-8") as outfile:
writer = csv.writer(outfile)
writer.writerow(list(masterheaders))
for row in masterlist:
writer.writerow(list(row.values()))
Scraper does not appear to be downloading the newest data somehow. Newest data appears to be July 30, but entries for Kelpr and NBCUniversal from July 31 are not getting picked up.