Open Bergam0t opened 1 month ago
HM's code:
import pandas as pd
import numpy as np
import geopandas as gpd
import requests
import routingpy as rp
import time
def postcode_lookup_builder(dataset):
patient_postcode_lookup = dataset.groupby(['PTPostcode'
,'PTLongitude'
,'PTLatitude'
,'DestLongitude',
'DestLatitude']).size()\
.reset_index().rename\
(columns={0:'count'})
patient_postcode_lookup_simple = dataset[['PTPostcode',
'PTLongitude',
'PTLatitude',
'DestLongitude',
'DestLatitude']]
patient_postcode_lookup_final = patient_postcode_lookup_simple\
.drop_duplicates(
subset='PTPostcode'
,keep='first'
,inplace=False
,ignore_index=False)
return patient_postcode_lookup_final
def travel_lookup_builder(postcode_list,apikey):
travel_matrix_dist_df = [] # list to store travel distances
travel_matrix_time_df = [] # list to store travel times
apikey = apikey
ors_api = rp.ORS(api_key=apikey)
for i in range(int(np.ceil(len(postcode_list)/50))):
postcode_list_filtered = postcode_list[i*50:(((
i+1)*50)-1)]
#print(f"Collecting coordinates {i*50} to {(((i+1)*50)-1)}")
postcode_list_filtered
source_coord_pairs = list(
zip(postcode_list_filtered['PTLongitude'],
postcode_list_filtered['PTLatitude'])
)
#source_coord_pairs
destination_coord_pairs = list(
zip(postcode_list_filtered['DestLongitude'],
postcode_list_filtered['DestLatitude'])
)
destination_coord_pairs
all_coordinates = []
all_coordinates.extend(source_coord_pairs)
all_coordinates.extend(destination_coord_pairs)
#all_coordinates
source_indices = [i for i in range(len(source_coord_pairs))]
destination_indices = [i for i in range(len(destination_coord_pairs),
len(all_coordinates))]
travel_matrix_time_dist = ors_api.matrix(
locations = all_coordinates,
profile='driving-car',
sources=source_indices,
destinations=destination_indices,
metrics=["duration","distance"]
)
#travel_matrix_time_dist_collated.append(travel_matrix_time_dist)
# code for getting matrix returned
travel_matrix_time_df.append(
pd.DataFrame(
travel_matrix_time_dist.durations,
#columns = list_of_destination_postcodes,
index = postcode_list_filtered['PTPostcode'],
).rename_axis("Postcode", axis="columns")
)
travel_matrix_dist_df.append(
pd.DataFrame(
travel_matrix_time_dist.distances,
#columns = list_of_destination_postcodes
index = postcode_list_filtered['PTPostcode'],
).rename_axis("Postcode", axis="columns")
)
time.sleep(5) # Sleep for 5 seconds so as not to exceed ORS request rate
# create final travel time dataframe
#travel_matrix_time_df
travel_matrix_time_df_final = pd.concat(travel_matrix_time_df, \
ignore_index=False)
travel_matrix_time_df_final['time_mins_car'] = travel_matrix_time_df_final\
.min(axis=1)/60
travel_matrix_time_df_final = travel_matrix_time_df_final['time_mins_car']
travel_matrix_time_df_final
# save as csv file so don't need to run full process every time
travel_matrix_time_df_final.to_csv('postcode_time_lookup.csv')
#create final travel distance dataframe
travel_matrix_dist_df_final = pd.concat(travel_matrix_dist_df, \
ignore_index=False)
#travel_matrix_dist_df_final
travel_matrix_dist_df_final['dist_kms'] = travel_matrix_dist_df_final.min(
axis=1)/1000
travel_matrix_dist_df_final = travel_matrix_dist_df_final['dist_kms']
#travel_matrix_dist_df_final
# save as csv file so don't need to run full process every time
travel_matrix_dist_df_final.to_csv('postcode_dist_lookup.csv')
def lsoa_lookup_builder(postcode_list):
# Create an empty list for our results
results = []
patient_postcode_list = postcode_list
# Iterate through the postcodes 100 at a time, returning the data from the API
for i in range(int(np.ceil(len(patient_postcode_list)/100))):
postcode_list_filtered = patient_postcode_list[i*100:(((i+1)*100)-1)]
print(f"Collecting postcodes {i*100} to {(((i+1)*100)-1)}")
# Send API request, passing in your postcodes as a list
patient_postcode_lookup = requests.post(
"https://api.postcodes.io/postcodes",
json={"postcodes": postcode_list_filtered},
verify=False
)
results.append(patient_postcode_lookup)
# Turn the results into a list of pandas dataframes
# (1 dataframe per 100 results)
results_dfs = [pd.json_normalize(i.json()['result'], sep='_')
for i in results]
# join all of our dataframes into a single dataframe.
postcode_tolsoa_lookup = pd.concat(results_dfs)
postcode_tolsoa_lookup = postcode_tolsoa_lookup[['query'
,'result_codes_lsoa'
,'result_nhs_ha']]
postcode_tolsoa_lookup.to_csv('postcode_lsoa_lookup.csv')
Awaiting code snippet