Closed NickZachary closed 2 months ago
import pandas as pd import requests from urllib.parse import urlencode import io # Import io module for BytesIO handling from google.colab import files
def reverse_geocode(lat, lon): base_url = "https://geocoder.alpha.phac.gc.ca/api/v1/reverse?" params = { "point.lat": lat, "point.lon": lon } url = base_url + urlencode(params) response = requests.get(url) if response.status_code == 200: return response.json() else: return None
def prioritize_features(features): if not features: return None
# Sort features by confidence (descending), then by distance (ascending)
sorted_features = sorted(features, key=lambda x: (-x['properties']['confidence'], x['properties']['distance']))
top_feature = sorted_features[0] # Select the top ranked feature
# Extract relevant properties
properties = top_feature['properties']
coordinates = top_feature['geometry']['coordinates']
result = {
'coordinates': coordinates,
'name': properties.get('name', ''),
'housenumber': properties.get('housenumber', ''),
'street': properties.get('street', ''),
'confidence': properties.get('confidence', ''),
'distance': properties.get('distance', ''),
'accuracy': properties.get('accuracy', ''),
'country': properties.get('country', ''),
'region': properties.get('region', ''),
'region_a': properties.get('region_a', ''),
'county': properties.get('county', ''),
'county_gid': properties.get('county_gid', ''),
'locality': properties.get('locality', ''),
'neighbourhood': properties.get('neighbourhood', ''),
'label': properties.get('label', '')
}
return result
def process_csv(input_file, output_file): df = pd.read_csv(io.BytesIO(uploaded[input_file])) results = []
for index, row in df.iterrows():
if pd.notna(row['ddLat']) and pd.notna(row['ddLong']):
lat = float(row['ddLat'])
lon = float(row['ddLong'])
geocoding_data = reverse_geocode(lat, lon)
if geocoding_data:
features = geocoding_data.get('features', [])
result = prioritize_features(features)
if result:
result['inputID'] = row['inputID'] # Assuming 'inputID' is the primary key in input CSV
results.append(result)
# Create output DataFrame and save to CSV
output_df = pd.DataFrame(results)
output_df.to_csv(output_file, index=False)
print("Upload the input CSV file:") uploaded = files.upload()
input_csv = next(iter(uploaded))
print("Please enter the name of the output CSV file (without extension):") output_file_name = input().strip() # Ensure no leading/trailing spaces
output_dir = '/content' output_csv = f"{output_dir}/{output_file_name}.csv"
process_csv(input_csv, output_csv)
print(f"Output CSV file saved as {output_csv}")
import pandas as pd import requests from urllib.parse import urlencode
Function to fetch reverse geocoding data
def reverse_geocode(lat, lon): base_url = "https://geocoder.alpha.phac.gc.ca/api/v1/reverse?" params = { "point.lat": lat, "point.lon": lon } url = base_url + urlencode(params) response = requests.get(url) if response.status_code == 200: return response.json() else: return None
Function to prioritize and rank features
def prioritize_features(features): if not features: return None
Function to process input CSV and generate output CSV
def process_csv(input_file, output_file): df = pd.read_csv(input_file) results = []
Prompt user to upload input CSV file
print("Please upload the input CSV file:") input_csv = input()
Prompt user to select a local folder and name for output CSV file
print("Please enter the name of the output CSV file (without extension):") output_file_name = input() output_csv = f"/content/{output_file_name}.csv" # Assuming using Google Colab
Process the CSV file
process_csv(input_csv, output_csv)
print(f"Output CSV file saved as {output_csv}")