hsma-programme / hsma6_geographic_optimisation_and_visualisation_book

A book summarising the content of the geographic optimization and visualision module in the HSMA programme **Work In Progress**
https://hsma-programme.github.io/hsma6_geographic_optimisation_and_visualisation_book/
Other
3 stars 1 forks source link

Add HM's code example on looping through/rate limiting requests to routingpy/ORS #4

Open Bergam0t opened 1 month ago

Bergam0t commented 1 month ago

Awaiting code snippet

Bergam0t commented 1 month ago

HM's code:

import pandas as pd
import numpy as np
import geopandas as gpd
import requests
import routingpy as rp
import time

def postcode_lookup_builder(dataset):
    patient_postcode_lookup = dataset.groupby(['PTPostcode'
                                            ,'PTLongitude'
                                            ,'PTLatitude'
                                            ,'DestLongitude',
                                            'DestLatitude']).size()\
                                            .reset_index().rename\
                                            (columns={0:'count'})

    patient_postcode_lookup_simple = dataset[['PTPostcode',
                                                            'PTLongitude',
                                                            'PTLatitude',
                                                            'DestLongitude',
                                                            'DestLatitude']]

    patient_postcode_lookup_final = patient_postcode_lookup_simple\
                                                    .drop_duplicates(
                                                    subset='PTPostcode'
                                                    ,keep='first'
                                                    ,inplace=False
                                                    ,ignore_index=False)

    return patient_postcode_lookup_final

def travel_lookup_builder(postcode_list,apikey):

    travel_matrix_dist_df = [] # list to store travel distances
    travel_matrix_time_df = [] # list to store travel times
    apikey = apikey
    ors_api = rp.ORS(api_key=apikey)

    for i in range(int(np.ceil(len(postcode_list)/50))):

        postcode_list_filtered = postcode_list[i*50:(((
                                                                i+1)*50)-1)]

        #print(f"Collecting coordinates {i*50} to {(((i+1)*50)-1)}")

        postcode_list_filtered
        source_coord_pairs = list(
        zip(postcode_list_filtered['PTLongitude'],
        postcode_list_filtered['PTLatitude'])
        )

        #source_coord_pairs

        destination_coord_pairs = list(
            zip(postcode_list_filtered['DestLongitude'],
            postcode_list_filtered['DestLatitude'])
            )

        destination_coord_pairs

        all_coordinates = []

        all_coordinates.extend(source_coord_pairs)
        all_coordinates.extend(destination_coord_pairs)

        #all_coordinates

        source_indices = [i for i in range(len(source_coord_pairs))]

        destination_indices = [i for i in range(len(destination_coord_pairs), 
                                            len(all_coordinates))]

        travel_matrix_time_dist = ors_api.matrix(
        locations = all_coordinates,
        profile='driving-car',
        sources=source_indices,
        destinations=destination_indices,
        metrics=["duration","distance"]
        )

        #travel_matrix_time_dist_collated.append(travel_matrix_time_dist)

    # code for getting matrix returned

        travel_matrix_time_df.append(
        pd.DataFrame(
        travel_matrix_time_dist.durations,
            #columns = list_of_destination_postcodes,
            index = postcode_list_filtered['PTPostcode'],
            ).rename_axis("Postcode", axis="columns") 
        )

        travel_matrix_dist_df.append(
        pd.DataFrame(
        travel_matrix_time_dist.distances,
            #columns = list_of_destination_postcodes
            index = postcode_list_filtered['PTPostcode'],
            ).rename_axis("Postcode", axis="columns")
        )

        time.sleep(5) # Sleep for 5 seconds so as not to exceed ORS request rate

    # create final travel time dataframe

    #travel_matrix_time_df

    travel_matrix_time_df_final = pd.concat(travel_matrix_time_df, \
                                            ignore_index=False)

    travel_matrix_time_df_final['time_mins_car'] = travel_matrix_time_df_final\
                                                    .min(axis=1)/60

    travel_matrix_time_df_final = travel_matrix_time_df_final['time_mins_car']

    travel_matrix_time_df_final
    # save as csv file so don't need to run full process every time
    travel_matrix_time_df_final.to_csv('postcode_time_lookup.csv')

    #create final travel distance dataframe

    travel_matrix_dist_df_final = pd.concat(travel_matrix_dist_df, \
                                            ignore_index=False)

    #travel_matrix_dist_df_final

    travel_matrix_dist_df_final['dist_kms'] = travel_matrix_dist_df_final.min(
                                            axis=1)/1000

    travel_matrix_dist_df_final = travel_matrix_dist_df_final['dist_kms']

    #travel_matrix_dist_df_final
    # save as csv file so don't need to run full process every time
    travel_matrix_dist_df_final.to_csv('postcode_dist_lookup.csv')

def lsoa_lookup_builder(postcode_list):
# Create an empty list for our results
    results = []

    patient_postcode_list = postcode_list

    # Iterate through the postcodes 100 at a time, returning the data from the API
    for i in range(int(np.ceil(len(patient_postcode_list)/100))):

        postcode_list_filtered = patient_postcode_list[i*100:(((i+1)*100)-1)]
        print(f"Collecting postcodes {i*100} to {(((i+1)*100)-1)}")

        # Send API request, passing in your postcodes as a list
        patient_postcode_lookup = requests.post(
            "https://api.postcodes.io/postcodes",
            json={"postcodes": postcode_list_filtered},
            verify=False
        )

        results.append(patient_postcode_lookup)

    # Turn the results into a list of pandas dataframes
    # (1 dataframe per 100 results)
    results_dfs = [pd.json_normalize(i.json()['result'], sep='_')
    for i in results]

    # join all of our dataframes into a single dataframe.
    postcode_tolsoa_lookup = pd.concat(results_dfs)

    postcode_tolsoa_lookup = postcode_tolsoa_lookup[['query'
                                                ,'result_codes_lsoa'
                                                ,'result_nhs_ha']]

    postcode_tolsoa_lookup.to_csv('postcode_lsoa_lookup.csv')