porimol / countryinfo

A python module for returning data about countries, ISO info and states/provinces within them.
MIT License
140 stars 44 forks source link

capital_latlng #18

Closed DavidBreuer closed 4 years ago

DavidBreuer commented 4 years ago
DavidBreuer commented 4 years ago

Here is the used Python script (maybe it is helpful for someone) and the list of countries for which no coordinates were added:

# import modules
import os
import json
import io
import fileinput
import geopy
import geopy.geocoders

# capital json/dictionary key
pos = 'capital'
# new capital coordinates json/dictionary key
key = 'capital_latlng'
# path to original json files
path = '/media/Volume/Programs/countryinfo/countryinfo/data_old'

# prepare geocoder
geolocator = geopy.geocoders.Nominatim(user_agent='my-application')

# define auxiliary function
def insert_key_value(dct, pos, key, val):
    # insert after position key
    pos = np.argmax([pos == kei for kei in dct.keys()])
    poz = pos + 1
    # create new dictionary
    new = dict()
    for idx, (ikey, ival) in enumerate(dct.items()):
        if pos >= 0 and idx == poz:
            new[key] = val
        new[ikey] = ival
    return new

# loop over country jsons
files = sorted([os.path.join(path, file) for file in os.listdir(path)])
lenf = len(files)
for fi, file in enumerate(files[:]):

    try:

        # read json
        with io.open(file, 'r', encoding='utf8') as fil:
            dct = json.load(fil)

        # get capital and look up coordinates using geopy
        cap = dct['capital']
        loc = geolocator.geocode(cap)
        lat = np.round(loc.latitude, 6)
        lon = np.round(loc.longitude, 6)
        val = [lat, lon]
        new = insert_key_value(dct, pos, key, val)

        # write json
        dump = file.replace('data_old', 'data')
        with io.open(dump, 'w', encoding='utf8') as fil:
            json.dump(new, fil, ensure_ascii=False)

        # remove blanks
        with fileinput.FileInput(dump, inplace=True, backup=None) as fil:
            for line in fil:
                lin = line.replace(', ', ',').replace(': ', ':')
                print(lin, end='')

    except Exception:

        # print skipped country names
        print(os.path.basename(file))

# list of countries without given capitals
#antarctica.json
#ashmore_and_cartier_island.json
#burma.json
#clipperton_island.json
#europa_island.json
#gaza_strip.json
#glorioso_islands.json
#heard_island_and_mc_donald_islands.json
#holy_see_vatican_city.json
#hungary.json
#index.js
#ireland_northern.json
#jan_mayen.json
#jarvis_island.json
#johnston_atoll.json
#juan_de_nova_island.json
#macau.json
#midway_islands.json
#netherlands_antilles.json
#scotland.json
#virgin_islands.json
#wales.json
#west_bank.json