Refine `osm_check` function to default to searching for `salvador allende` instead of just `allende`

I think I did it:
Also consider using a generator object instead of creating a separate list lrl_placeholder... if I can figure out how to make it work.
Even better, I was able to rewrite the function so that it doesn't have to rely on a second list or generator (reference).
#
# OpenStreetMap checker of LOCALE_1
# note that locale_1 collection process is different between single-locale and multi-locale links
#
def osm_check(locale_1, data):
    #
    # when we have this abacq locale, we then cross-check this with OpenStreetMap
    # first, do a specific search for 'Salvador Allende'
    #
    locale_link = f'https://www.openstreetmap.org/search?query=Salvador%20Allende%20{locale_1}%20{country_en}'
    driver.get(locale_link)
    # humanizer fixes the problem of the script getting no OSM info sometimes when you can see in the browser that there actually is 
    humanizer(timer)
    osm_soup = BeautifulSoup(driver.page_source, 'html.parser', parse_only=SoupStrainer("ul", class_="results-list list-group list-group-flush"))

    #
    # go through each search result and have the user verify it
    #
    locale_results_list = []
    locale_results_list.extend(list(osm_soup.find_all("a", class_="set_position")))

    #
    # if the first search has no results, try a more general search for 'Allende'
    #
    if len(locale_results_list) == 0:
        locale_link = f'https://www.openstreetmap.org/search?query=Allende%20{locale_1}%20{country_en}'
        driver.get(locale_link)
        # humanizer fixes the problem of the script getting no OSM info sometimes when you can see in the browser that there actually is 
        humanizer(timer)
        osm_soup = BeautifulSoup(driver.page_source, 'html.parser', parse_only=SoupStrainer("ul", class_="results-list list-group list-group-flush"))
        locale_results_list.extend(list(osm_soup.find_all("a", class_="set_position")))

    #
    # a single result looks like this - we can derive lots of info from here once user verifies that it looks good
    #
    # <a class="set_position" data-lat="-12.1102763" data-lon="-77.0104283" 
    # data-min-lat="-12.1103037" data-max-lat="-12.1102452" data-min-lon="-77.0109212" data-max-lon="-77.0097999" 
    # data-prefix="Residential Road" data-name="Salvador Allende, Villa Victoria, Surquillo, Province of Lima, Lima Metropolitan Area, Lima, 15000, Peru" 
    # data-type="way" data-id="426845566" href="/way/426845566">Salvador Allende, Villa Victoria, Surquillo, Province of Lima, Lima Metropolitan Area, Lima, 15000, Peru</a>
    #
    if len(locale_results_list) == 0:
        print('No addresses found in OpenStreetMap. Will use the locale derived from the article...')
        data['locale_1'].append(locale_1)
        print(f'Locale 1: {locale_1}')
        # clear the previous entry's osm_address and osm_info so that it doesn't get copied into the current entry
        global osm_address
        osm_address = ''
        global osm_info
        osm_info = ''
    else:
        print(f'{str(len(locale_results_list))} possible address(es) found in OpenStreetMap.')
        for result in locale_results_list:
            result = str(result)
            osm_address = re.search(r'>\"*(.*)\"*<\/a>', result)
            osm_address = str(osm_address.group(1))
            #
            # have user verify the address - this decides what this loop should do next
            #
            print(f'Please verify if this address matches the place in this article:\n{osm_address}')
            user_verification = input('>>> Type y if yes, n if no: ')
            # typo prevention
            while user_verification != 'n' and user_verification != 'y':
                user_verification = input('>>> Try again - Type y if yes, n if no: ')
            #
            # if there is only one result and it doesn't match the article's place
            #
            if user_verification == 'n' and len(locale_results_list) == 1:
                print('OpenStreetMap address does not match the place in this article. Will use the locale derived from the article...')
                # clear the previous entry's osm_address and osm_info so that it doesn't get copied into the current entry
                osm_address = ''
                osm_info = ''
                data['locale_1'].append(locale_1)
                print(f'Locale 1: {locale_1}')
                break
            #
            # if result matches article's place
            #
            elif user_verification == 'y':
                # we'll save the whole result in a variable for later parsing. we can then close the loop.
                osm_info = result
                break
            #
            # if there are more than one result and we haven't exhausted the loop yet
            #
            elif user_verification == 'n' and len(locale_results_list) > 1:
                # clear the previous entry's osm_address and osm_info so that it doesn't get copied into the current entry
                osm_address = ''
                osm_info = ''
                continue
        #
        # if we have exhausted all list items and none of them matches the place
        #
        else:
            print('All OpenStreetMap addresses do not match the place in this article. Will use the locale derived from the article...')
            # clear the previous entry's osm_address and osm_info so that it doesn't get copied into the current entry
            osm_address = ''
            osm_info = ''
            # nothing else we can do but add the default locale_1
            data['locale_1'].append(locale_1)
            print(f'Locale 1: {locale_1}')

        #
        # stay in the web page like a normal human would
        #
        humanizer(timer)
        #
        # then go on with our automated lives
        #
        # when we have osm_info, we'll take locale details from its osm_address by splitting it.
        # sample split:
        # ['Salvador Allende', 'Villa Victoria', 'Surquillo', 'Province of Lima', 'Lima Metropolitan Area', 'Lima', '15000', 'Peru']
        # index 0 is the place's name, -1 is the country, -2 is the zip code, -3 is locale_1, etc...
        #
        try:
            osm_address = osm_address.split(', ')
            locale_1 = osm_address[-3]
            data['locale_1'].append(locale_1)
            print(f'Locale 1: {locale_1}')
        except:
            pass
jamieglohere / a-place-for-salvador-allende

Refine `osm_check` function to default to searching for `salvador allende` instead of just `allende` #8