PHACDataHub / pelias-canada

https://geocoder.alpha.phac.gc.ca/
0 stars 1 forks source link

create Python reverse geocoding script #111

Closed NickZachary closed 2 months ago

NickZachary commented 2 months ago

import pandas as pd import requests from urllib.parse import urlencode

Function to fetch reverse geocoding data

def reverse_geocode(lat, lon): base_url = "https://geocoder.alpha.phac.gc.ca/api/v1/reverse?" params = { "point.lat": lat, "point.lon": lon } url = base_url + urlencode(params) response = requests.get(url) if response.status_code == 200: return response.json() else: return None

Function to prioritize and rank features

def prioritize_features(features): if not features: return None

# Sort features by confidence (descending), then by distance (ascending)
sorted_features = sorted(features, key=lambda x: (-x['properties']['confidence'], x['properties']['distance']))
top_feature = sorted_features[0]  # Select the top ranked feature

# Extract relevant properties
properties = top_feature['properties']
coordinates = top_feature['geometry']['coordinates']

result = {
    'coordinates': coordinates,
    'name': properties.get('name', ''),
    'housenumber': properties.get('housenumber', ''),
    'street': properties.get('street', ''),
    'confidence': properties.get('confidence', ''),
    'distance': properties.get('distance', ''),
    'accuracy': properties.get('accuracy', ''),
    'country': properties.get('country', ''),
    'region': properties.get('region', ''),
    'region_a': properties.get('region_a', ''),
    'county': properties.get('county', ''),
    'county_gid': properties.get('county_gid', ''),
    'locality': properties.get('locality', ''),
    'neighbourhood': properties.get('neighbourhood', ''),
    'label': properties.get('label', '')
}
return result

Function to process input CSV and generate output CSV

def process_csv(input_file, output_file): df = pd.read_csv(input_file) results = []

for index, row in df.iterrows():
    if pd.notna(row['ddLat']) and pd.notna(row['ddLong']):
        lat = float(row['ddLat'])
        lon = float(row['ddLong'])
        geocoding_data = reverse_geocode(lat, lon)

        if geocoding_data:
            features = geocoding_data.get('features', [])
            result = prioritize_features(features)

            if result:
                result['inputID'] = row['inputID']  # Assuming 'inputID' is the primary key in input CSV
                results.append(result)

# Create output DataFrame and save to CSV
output_df = pd.DataFrame(results)
output_df.to_csv(output_file, index=False)

Prompt user to upload input CSV file

print("Please upload the input CSV file:") input_csv = input()

Prompt user to select a local folder and name for output CSV file

print("Please enter the name of the output CSV file (without extension):") output_file_name = input() output_csv = f"/content/{output_file_name}.csv" # Assuming using Google Colab

Process the CSV file

process_csv(input_csv, output_csv)

print(f"Output CSV file saved as {output_csv}")

NickZachary commented 2 months ago

pelias_reversegeocoding_batch_v1.zip

NickZachary commented 2 months ago
  1. Code above requires the input csv to have: a) headers/fields b. primary key such as a rowID c. field named "ddLat" (containing latitude in decimal degrees WGS 1984); and d. field named "ddLong" (containing longitude in decimal degrees WGS 1984)
NickZachary commented 2 months ago

batch_reverseGeocoder_Pelias_v2.zip testRgeo.csv

NickZachary commented 2 months ago

import pandas as pd import requests from urllib.parse import urlencode import io # Import io module for BytesIO handling from google.colab import files

Function to fetch reverse geocoding data

def reverse_geocode(lat, lon): base_url = "https://geocoder.alpha.phac.gc.ca/api/v1/reverse?" params = { "point.lat": lat, "point.lon": lon } url = base_url + urlencode(params) response = requests.get(url) if response.status_code == 200: return response.json() else: return None

Function to prioritize and rank features

def prioritize_features(features): if not features: return None

# Sort features by confidence (descending), then by distance (ascending)
sorted_features = sorted(features, key=lambda x: (-x['properties']['confidence'], x['properties']['distance']))
top_feature = sorted_features[0]  # Select the top ranked feature

# Extract relevant properties
properties = top_feature['properties']
coordinates = top_feature['geometry']['coordinates']

result = {
    'coordinates': coordinates,
    'name': properties.get('name', ''),
    'housenumber': properties.get('housenumber', ''),
    'street': properties.get('street', ''),
    'confidence': properties.get('confidence', ''),
    'distance': properties.get('distance', ''),
    'accuracy': properties.get('accuracy', ''),
    'country': properties.get('country', ''),
    'region': properties.get('region', ''),
    'region_a': properties.get('region_a', ''),
    'county': properties.get('county', ''),
    'county_gid': properties.get('county_gid', ''),
    'locality': properties.get('locality', ''),
    'neighbourhood': properties.get('neighbourhood', ''),
    'label': properties.get('label', '')
}
return result

Function to process input CSV and generate output CSV

def process_csv(input_file, output_file): df = pd.read_csv(io.BytesIO(uploaded[input_file])) results = []

for index, row in df.iterrows():
    if pd.notna(row['ddLat']) and pd.notna(row['ddLong']):
        lat = float(row['ddLat'])
        lon = float(row['ddLong'])
        geocoding_data = reverse_geocode(lat, lon)

        if geocoding_data:
            features = geocoding_data.get('features', [])
            result = prioritize_features(features)

            if result:
                result['inputID'] = row['inputID']  # Assuming 'inputID' is the primary key in input CSV
                results.append(result)

# Create output DataFrame and save to CSV
output_df = pd.DataFrame(results)
output_df.to_csv(output_file, index=False)

Upload the input CSV file

print("Upload the input CSV file:") uploaded = files.upload()

Assuming the uploaded file name is 'testRgeo.csv'

input_csv = next(iter(uploaded))

Prompt user to enter the name of the output CSV file (without extension)

print("Please enter the name of the output CSV file (without extension):") output_file_name = input().strip() # Ensure no leading/trailing spaces

Ensure output path is within /content directory

output_dir = '/content' output_csv = f"{output_dir}/{output_file_name}.csv"

Process the CSV file

process_csv(input_csv, output_csv)

print(f"Output CSV file saved as {output_csv}")

NickZachary commented 1 month ago

reverse_geocoder_python_v3.zip