elnappo / project-novis

OAuth provider, DB, API and more for amateur radio
https://www.project-novis.org
GNU Affero General Public License v3.0
6 stars 3 forks source link

Import call sings from more sources #7

Open elnappo opened 5 years ago

elnappo commented 5 years ago

If your'e aware of any other source, please add a comment.

Official

Unofficial

TheSkorm commented 4 years ago

I run vklookup.info and thought I'd share a quick snippet of how I ETL the Australian call sign data into our database. Hopefully this helps you add Australia. Good luck on the project!

import urllib.request
from io import BytesIO, TextIOWrapper
import zipfile
import csv
import boto3
from datetime import datetime

URL="http://web.acma.gov.au/rrl-updates/spectra_rrl.zip"
SERVICE="6" #based on licence_service.csv
TABLE="australiancallsigns"

db = boto3.client('sdb')

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

def handler(event, context):
    clients={}
    licenses={}
    existing_calls={}
    batch=[]

    with urllib.request.urlopen(URL) as rrl_http:
        with zipfile.ZipFile(BytesIO(rrl_http.read())) as rrl_zip:
            # grab all the client details first so we can look them up
            with rrl_zip.open("client.csv") as clients_file:
                clients_reader = csv.DictReader(TextIOWrapper(clients_file))
                for client in clients_reader:
                    clients[client["CLIENT_NO"]] = client
            with rrl_zip.open("licence.csv") as license_file:
                licenses_reader = csv.DictReader(TextIOWrapper(license_file))
                for licensea in licenses_reader:
                    licenses[licensea["LICENCE_NO"]] = licensea
            with rrl_zip.open("device_details.csv") as device_details:
                device_details_reader = csv.DictReader(TextIOWrapper(device_details))
                for device in device_details_reader:
                    #if device['SV_ID'] == SERVICE:
                        try:
                            licensea = licenses[device["LICENCE_NO"]]
                            client = clients[licensea["CLIENT_NO"]]
                        except KeyError:
                            continue
                        key = device['CALL_SIGN']
                        if key == "":
                            continue
                        if key in existing_calls:
                            continue
                        else:
                            existing_calls[key] = True
                        try:
                            data = {
                                "Name":key,
                                "Attributes":[
                                    {
                                        "Name": "name",
                                        "Value": client["LICENCEE"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "street",
                                        "Value": client["POSTAL_STREET"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "postcode",
                                        "Value": client["POSTAL_POSTCODE"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "suburb",
                                        "Value": client["POSTAL_SUBURB"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "state",
                                        "Value": client["POSTAL_STATE"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "type",
                                        "Value": licensea["LICENCE_CATEGORY_NAME"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "license_no",
                                        "Value": licensea["LICENCE_NO"],
                                        "Replace": True
                                    }
                                    ,
                                    {
                                        "Name": "date_of_effect",
                                        "Value": licensea["DATE_OF_EFFECT"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "date_of_expiry",
                                        "Value": licensea["DATE_OF_EXPIRY"],
                                        "Replace": True
                                    },
                                    {
                                        "Name": "status",
                                        "Value": licensea["STATUS_TEXT"],
                                        "Replace": True
                                    }
                                    ,
                                    {
                                        "Name": "link",
                                        "Value": "https://web.acma.gov.au/rrl/licence_search.licence_lookup?pLICENCE_NO=" + licensea["LICENCE_NO"],
                                        "Replace": True
                                    }
                                ]
                            }
                            #print(data)
                            batch.append(data)
                        except:
                            pass
        lastupdate = {
            "Name":"lastupdate",
            "Attributes":[
                {
                    "Name": "lastupdate",
                    "Value": datetime.now().isoformat(),
                    "Replace": True
                }

            ]
        }
        existing_calls["lastupdate"] = True
        batch.append(lastupdate)
        dbchunks = list(chunks(batch,25))
        for chunk in dbchunks:
            db.batch_put_attributes(
                DomainName=TABLE,
                Items=chunk
            )
            print("uploading chunk")
        print("donedb add, starting cleanup")

        delete_batch = []
        paginator = db.get_paginator('select')
        response_iterator = paginator.paginate(
            SelectExpression='select * from ' + TABLE,
            ConsistentRead=True
        )
        for data in response_iterator:
            for x in data['Items']:
                if x['Name'] not in existing_calls:
                    delete_batch.append(x)
                    print("Deleting :" + x['Name'] )

        dbdeletechunks = list(chunks(delete_batch,25))
        for chunk in dbdeletechunks:
            db.batch_delete_attributes(
                DomainName=TABLE,
                Items=chunk
            )
            print("deleting chunk")
kronicd commented 2 years ago

This may help you with the Australian data: https://github.com/kronicd/rrl_import