Utah Data Extraction - Githubissues

import json from datetime import datetime from pytz import timezone from urllib.request import urlopen import sys import csv from io import StringIO

def load_json(): url = (" https://services6.arcgis.com/KaHXE9OkiB9e63uE/ArcGIS/rest/services/COVID19_Long_Term_Care_Facility_Impacts/FeatureServer/273/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token= ") response = urlopen(url) status_code = response.getcode() if status_code == 200: source = response.read() dict_data = json.loads(source) else: print("URL request failed with status code: " + status_code) sys.exit(1)

if not dict_data is None:
    features_list = dict_data["features"]
    si = StringIO()
    fieldnames=["Date Collected", "State", "blank1",
                "blank2", "Facility_Name", "Facility_Type", "blank3",
                "blank4", "blank5", "blank6", "blank7", "blank8",

"Resolved_Y_N", "blank9", "Postive_Patients_Desc", "blank10", "blank11", "blank12", "blank13", "blank14", "blank15", "blank16", "Unresolved_Postive_Patients_Desc", "blank17", "blank18", "blank19", "blank20", "blank21", "blank22", "blank23", "Dashboard_Display"] writer = csv.DictWriter(si, fieldnames)

    writer.writeheader()

    for item in features_list:
        row_data = '{"Date Collected": '
        row_data +=

datetime.now(timezone('US/Eastern')).strftime('%Y%m%d') row_data += ', "State": "UT", "blank1": "", "blank2": "", "Facility_Name": "' row_data += (item["attributes"]["Facility_Name"])

        row_data += '","Facility_Type": "'
        row_data += item["attributes"]["Facility_Type"]

        row_data += '", "blank3":"", "blank4": "", "blank5": "",

"blank6": "", "blank7":"", "blank8": ""' row_data += ', "Resolved_Y_N": "' row_data += item["attributes"]["Resolved_Y_N"]

        row_data += '", "blank9": "", "Postive_Patients_Desc": "'
        if item["attributes"]["Postive_Patients_Desc"] == "No Resident

Cases": row_data += '0' else: row_data += item["attributes"]["Postive_Patients_Desc"]

        row_data += '",

"blank10":"","blank11":"","blank12":"","blank13":"","blank14":"","blank15":"","blank16": ""' row_data += ', "Unresolved_Postive_Patients_Desc": "' if item["attributes"]["Resolved_Y_N"] == "N": row_data += item["attributes"]["Postive_Patients_Desc"] else: row_data += '' row_data += '", "blank17":"","blank18":"","blank19": "", "blank20":"", "blank21":"", "blank22":"","blank23":""' row_data += ', "Dashboard_Display": "' row_data += item["attributes"]["Dashboard_Display"]

        row_data += '"}'

        row_dict = json.loads(row_data)
        writer.writerow(row_dict)

    print(si.getvalue())

On Tue, Jan 12, 2021 at 2:56 PM Zach Lipton notifications@github.com wrote:

@zachlipton requested changes on this pull request.

Thanks for this! A few comments inline. Sorry if this seems like a lot or if anything is unclear. By all means shoot me a message if anything doesn't make sense or you're not sure how to do something and I'll do my best to help.

In data-collection-scripts/ltc-scraper/Utah_Data_Extract.py https://github.com/COVID19Tracking/covid-tracking-data/pull/220#discussion_r556134879 :

@@ -0,0 +1,47 @@ +import json +from datetime import datetime +from pytz import timezone +from urllib.request import urlopen + +def load_json():

url = ("https://services6.arcgis.com/KaHXE9OkiB9e63uE/ArcGIS/rest/services/COVID19_Long_Term_Care_Facility_Impacts/FeatureServer/273/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=")

response = urlopen(url)

if response.getcode() == 200:

source = response.read()

dict_data = json.loads(source)

else:

print("Error")

If this fails for any reason, let's print an error message with the error code and then sys.exit(1) or some other non-zero return code so that the script terminates and automation knows the script failed and can report the error

In data-collection-scripts/ltc-scraper/Utah_Data_Extract.py https://github.com/COVID19Tracking/covid-tracking-data/pull/220#discussion_r556135010 :

+from pytz import timezone +from urllib.request import urlopen + +def load_json():

url = ("https://services6.arcgis.com/KaHXE9OkiB9e63uE/ArcGIS/rest/services/COVID19_Long_Term_Care_Facility_Impacts/FeatureServer/273/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=")

response = urlopen(url)

if response.getcode() == 200:

source = response.read()

dict_data = json.loads(source)

else:

print("Error")

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

Remove unnecessary comments please

In data-collection-scripts/ltc-scraper/Utah_Data_Extract.py https://github.com/COVID19Tracking/covid-tracking-data/pull/220#discussion_r556137965 :

+def load_json():

url = ("https://services6.arcgis.com/KaHXE9OkiB9e63uE/ArcGIS/rest/services/COVID19_Long_Term_Care_Facility_Impacts/FeatureServer/273/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=")

response = urlopen(url)

if response.getcode() == 200:

source = response.read()

dict_data = json.loads(source)

else:

print("Error")

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

CSV_file = open("CSV_Data_Extract.csv", "w")

if not dict_data is None:

I don't think load_json() is being called anywhere? So dict_data has no data here. Maybe make load_json() return the dict and then use it here?

In data-collection-scripts/ltc-scraper/Utah_Data_Extract.py https://github.com/COVID19Tracking/covid-tracking-data/pull/220#discussion_r556138494 :

dict_data = json.loads(source)

else:

print("Error")

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

CSV_file = open("CSV_Data_Extract.csv", "w")

if not dict_data is None:

features_list = dict_data["features"]

for item in features_list:

fmt = datetime.today().strftime('%Y%m%d')

now_time = datetime.now(timezone('US/Eastern'))

line_data = now_time.strftime(fmt)

I think this can be simplified to datetime.now(timezone('US/Eastern')).strftime('%Y%m%d')

In data-collection-scripts/ltc-scraper/Utah_Data_Extract.py https://github.com/COVID19Tracking/covid-tracking-data/pull/220#discussion_r556139318 :

+from urllib.request import urlopen + +def load_json():

url = ("https://services6.arcgis.com/KaHXE9OkiB9e63uE/ArcGIS/rest/services/COVID19_Long_Term_Care_Facility_Impacts/FeatureServer/273/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=true&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=")

response = urlopen(url)

if response.getcode() == 200:

source = response.read()

dict_data = json.loads(source)

else:

print("Error")

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

CSV_file = open("CSV_Data_Extract.csv", "w")

Instead of writing the output to a file, please output it to STDOUT when the script finishes (just a print statement is fine). This will help us hook it up to GH Actions.

In data-collection-scripts/ltc-scraper/Utah_Data_Extract.py https://github.com/COVID19Tracking/covid-tracking-data/pull/220#discussion_r556146380 :

else:

print("Error")

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

CSV_file = open("CSV_Data_Extract.csv", "w")

if not dict_data is None:

features_list = dict_data["features"]

for item in features_list:

fmt = datetime.today().strftime('%Y%m%d')

now_time = datetime.now(timezone('US/Eastern'))

line_data = now_time.strftime(fmt)

line_data += "," + "UT"

Please don't output CSV data by trying to build it up from scratch this way. This will break the file format if any of the data fields contains a comma, newline, or anything else that needs to be escaped. The easiest thing to do is to transform the data into a Python array-of-dictionaries and use Python's csv library https://realpython.com/python-csv/#writing-csv-file-from-a-dictionary-with-csv to write it out, which will take care of outputting a header with the column names, formatting everything, and escaping the data safely for you. Something more-or-less like (untested):

writer = csv.writer(si, fieldnames=["Date Collected ", "State", "blank"]) writer.writeheader() writer.writerow({'Date Collected': '20210112', 'State': 'UT', 'blank': ''}) output = si.getvalue() print(output)

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/COVID19Tracking/covid-tracking-data/pull/220#pullrequestreview-566750725, or unsubscribe https://github.com/notifications/unsubscribe-auth/AHJYINUQG6D5D2PELYIUYQTSZTHR5ANCNFSM4V72I57A .

COVID19Tracking / covid-tracking-data

Utah Data Extraction #220

Thanks for this! A few comments inline. Sorry if this seems like a lot or if anything is unclear. By all means shoot me a message if anything doesn't make sense or you're not sure how to do something and I'll do my best to help.

If this fails for any reason, let's print an error message with the error code and then sys.exit(1) or some other non-zero return code so that the script terminates and automation knows the script failed and can report the error

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

Remove unnecessary comments please

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

I don't think load_json() is being called anywhere? So dict_data has no data here. Maybe make load_json() return the dict and then use it here?

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

I think this can be simplified to datetime.now(timezone('US/Eastern')).strftime('%Y%m%d')

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()

Instead of writing the output to a file, please output it to STDOUT when the script finishes (just a print statement is fine). This will help us hook it up to GH Actions.

print(response.read())

data = json.loads(response.read())

f = open("Facilities_with_Active_Outbreaks.json", "r")

data = f.read()