Open elnappo opened 5 years ago
I run vklookup.info and thought I'd share a quick snippet of how I ETL the Australian call sign data into our database. Hopefully this helps you add Australia. Good luck on the project!
import urllib.request
from io import BytesIO, TextIOWrapper
import zipfile
import csv
import boto3
from datetime import datetime
URL="http://web.acma.gov.au/rrl-updates/spectra_rrl.zip"
SERVICE="6" #based on licence_service.csv
TABLE="australiancallsigns"
db = boto3.client('sdb')
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
def handler(event, context):
clients={}
licenses={}
existing_calls={}
batch=[]
with urllib.request.urlopen(URL) as rrl_http:
with zipfile.ZipFile(BytesIO(rrl_http.read())) as rrl_zip:
# grab all the client details first so we can look them up
with rrl_zip.open("client.csv") as clients_file:
clients_reader = csv.DictReader(TextIOWrapper(clients_file))
for client in clients_reader:
clients[client["CLIENT_NO"]] = client
with rrl_zip.open("licence.csv") as license_file:
licenses_reader = csv.DictReader(TextIOWrapper(license_file))
for licensea in licenses_reader:
licenses[licensea["LICENCE_NO"]] = licensea
with rrl_zip.open("device_details.csv") as device_details:
device_details_reader = csv.DictReader(TextIOWrapper(device_details))
for device in device_details_reader:
#if device['SV_ID'] == SERVICE:
try:
licensea = licenses[device["LICENCE_NO"]]
client = clients[licensea["CLIENT_NO"]]
except KeyError:
continue
key = device['CALL_SIGN']
if key == "":
continue
if key in existing_calls:
continue
else:
existing_calls[key] = True
try:
data = {
"Name":key,
"Attributes":[
{
"Name": "name",
"Value": client["LICENCEE"],
"Replace": True
},
{
"Name": "street",
"Value": client["POSTAL_STREET"],
"Replace": True
},
{
"Name": "postcode",
"Value": client["POSTAL_POSTCODE"],
"Replace": True
},
{
"Name": "suburb",
"Value": client["POSTAL_SUBURB"],
"Replace": True
},
{
"Name": "state",
"Value": client["POSTAL_STATE"],
"Replace": True
},
{
"Name": "type",
"Value": licensea["LICENCE_CATEGORY_NAME"],
"Replace": True
},
{
"Name": "license_no",
"Value": licensea["LICENCE_NO"],
"Replace": True
}
,
{
"Name": "date_of_effect",
"Value": licensea["DATE_OF_EFFECT"],
"Replace": True
},
{
"Name": "date_of_expiry",
"Value": licensea["DATE_OF_EXPIRY"],
"Replace": True
},
{
"Name": "status",
"Value": licensea["STATUS_TEXT"],
"Replace": True
}
,
{
"Name": "link",
"Value": "https://web.acma.gov.au/rrl/licence_search.licence_lookup?pLICENCE_NO=" + licensea["LICENCE_NO"],
"Replace": True
}
]
}
#print(data)
batch.append(data)
except:
pass
lastupdate = {
"Name":"lastupdate",
"Attributes":[
{
"Name": "lastupdate",
"Value": datetime.now().isoformat(),
"Replace": True
}
]
}
existing_calls["lastupdate"] = True
batch.append(lastupdate)
dbchunks = list(chunks(batch,25))
for chunk in dbchunks:
db.batch_put_attributes(
DomainName=TABLE,
Items=chunk
)
print("uploading chunk")
print("donedb add, starting cleanup")
delete_batch = []
paginator = db.get_paginator('select')
response_iterator = paginator.paginate(
SelectExpression='select * from ' + TABLE,
ConsistentRead=True
)
for data in response_iterator:
for x in data['Items']:
if x['Name'] not in existing_calls:
delete_batch.append(x)
print("Deleting :" + x['Name'] )
dbdeletechunks = list(chunks(delete_batch,25))
for chunk in dbdeletechunks:
db.batch_delete_attributes(
DomainName=TABLE,
Items=chunk
)
print("deleting chunk")
This may help you with the Australian data: https://github.com/kronicd/rrl_import
If your'e aware of any other source, please add a comment.
Official
Unofficial