gregrs-uk / python-fhrs-osm

Python tools and Leaflet maps for downloading, comparing and visualising Food Hygiene Rating Scheme (FHRS) and OpenStreetMap data
http://gregrs.dev.openstreetmap.org/fhrs/
GNU General Public License v3.0
8 stars 2 forks source link

Investigate any more possible tags/values to include #64

Closed gregrs-uk closed 5 years ago

gregrs-uk commented 5 years ago

Investigate whether there are any more keys/values that should be included i.e. already have fhrs:id tags set.

gregrs-uk commented 5 years ago

Overpass query to get nodes/ways with fhrs:id set

[out:xml][timeout:120];
(
  node["fhrs:id"]({{bbox}});
  way["fhrs:id"]({{bbox}});
);
out tags;

R code for analysis

library(xml2)
library(dplyr)
library(purrr)
library(tidyr)
library(forcats)
library(ggplot2)
library(ggstance)

if (file.exists("fhrs-tidy.rds")) {
  tidy <- readRDS("fhrs-tidy.rds")
} else {
  x <- read_xml("fhrs.osm")

  tidy <- xml_find_all(x, "node|way") %>%
    map_dfr(
      function(this_entity) {
        tibble(
          type = xml_name(this_entity),
          id = xml_attr(this_entity, "id"),
          tags = this_entity %>%
            xml_find_all("tag") %>%
            xml_attrs() %>%
            map_dfr(~ as.list(.) %>% as_tibble()) %>%
            list()
        )
      }
    ) %>%
    mutate(
      # entities with no tags should still have an empty tags tibble with the right cols
      tags = map(tags, function(this_entity_tags) {
        if (nrow(this_entity_tags)) this_entity_tags
        else tibble(k = character(), v = character())
      })
    )

  saveRDS(tidy, "fhrs-tidy.rds")
}

fhrs_spread <- tidy %>%
  unnest() %>%
  spread(k, v) %>%
  select(type, id, amenity, club, craft, shop, tourism)

filter_list <- list(
  amenity = c(
    "bar", "cafe", "care_home", "childcare", "church_hall", "cinema",
    "college", "community_centre", "community_hall", "fast_food", "fuel",
    "hospital", "kindergarten", "nightclub", "nursing_home", "pharmacy",
    "place_of_worship", "post_office", "pub", "restaurant", "school",
    "social_club", "social_facility", "theatre", "village_hall"
  ),
  club = c("scouts", "social", "sport"),
  craft = c("brewery", "caterer", "confectionery", "distillery", "winery"),
  shop = c(
    "alcohol", "bakery", "butcher", "cheese", "chemist", "confectionery",
    "convenience", "deli", "delicatessen", "discount", "farm", "fishmonger",
    "greengrocer", "grocery", "health_food", "newsagent", "pastry",
    "supermarket", "variety_store"
  ),
  tourism = c("hotel", "guest_house")
)

plot_key_values <- function(key, relevant_filter_list) {
  key <- enquo(key)
  fhrs_spread %>%
    group_by(!!key) %>%
    count() %>%
    ungroup() %>%
    mutate(currently_included = !!key %in% relevant_filter_list) %>%
    filter(!is.na(!!key), n >= 10) %>%
    mutate(!!key := fct_reorder(!!key, n)) %>%
    ggplot(aes(n, !!key, fill = currently_included)) +
    geom_colh() +
    scale_x_log10()
}

plot_key_values(amenity, filter_list$amenity)
plot_key_values(club, filter_list$club)
plot_key_values(craft, filter_list$craft)
plot_key_values(shop, filter_list$shop)
plot_key_values(tourism, filter_list$tourism)
gregrs-uk commented 5 years ago

Possible candidates:

amenity=ice_cream shop=seafood

gregrs-uk commented 5 years ago

This is a much simpler way, using Overpass to get a TSV file. (Had to zip HTML file to allow upload to GitHub.)

fhrs.nb.html.zip

gregrs-uk commented 5 years ago

Dev server updated