marineenergy / api

Application programming interface (API) to expose records (FERC/USACE docs, MarineCadastre spatial, etc) to PRIMRE search engine
https://api.marineenergy.app
MIT License
0 stars 0 forks source link

Speed up Spatial queries in API #3

Closed bbest closed 3 years ago

bbest commented 3 years ago

See #2. Perhaps creating an initial bounding polygon per dataset to test for overlap across all datasets first.

Here's one trick:

Other references:

bbest commented 3 years ago

Here's the function that the Report app uses:

Pulling from SQL in spatial | marineenergy.app - Google Sheets.

bbest commented 3 years ago

Here's a more complete assessment of spatial querying done in the Shiny app and parameterized Rmarkdown report.

Spatial tab of Shiny

server.R#L309-L421:

output$tblSpatial <- renderDT({

    req(vals$queries_lit)

    message("output$tblSpatial")

    if (is.null(crud()$finished)){
      aoi_wkt <- NULL
    } else {
      aoi_wkt <- crud()$finished %>% pull(geometry) %>% st_as_text()
    }

    if (nrow(vals$queries_lit) == 0 || is.null(aoi_wkt)){
      dt_empty <- tibble(
        message = "Please Configure Tags and Locations to see results here") %>%
        datatable(rownames = F, options = list(dom = 't'))

      return(dt_empty)
    }

    spatial_receptors <- vals$queries_lit %>% 
      mutate(
        q = pmap(., function(Receptors, ...){
          keys <- c(Receptors) %>% 
            str_replace_all('"', '') %>%
            na_if("") %>% 
            na.omit()
          paste(keys, collapse = " AND ") })) %>% 
      pull(q) %>% 
      as.character()

    datasets <- tbl(con, "datasets") %>% 
      collect() %>%
      filter(ready) %>% 
      replace_na(list(buffer_km = 0)) %>% 
      select(-notes, -issues) %>% 
      separate_rows(tags, sep = ";") %>% 
      rename(tag = tags) %>% 
      mutate(
        tag = str_trim(tag)) %>% 
      filter(
        tag %in% spatial_receptors) %>% 
      arrange(tag, title) %>% 
      mutate(
        data      = map(
          code, 
          tabulate_dataset_shp_within_aoi, 
          aoi_wkt = aoi_wkt, output = "tibble"),
        data_nrow = map_int(data, nrow),
        Title     = map2_chr(
          title, src_url,
          function(x, y)
            glue("<a href={y} target='_blank'>{x}</a>")),
        Title     = ifelse(
          buffer_nm > 0,
          glue("{Title} [within {buffer_nm} nm of Location]"),
          Title)) %>% 
      select(
        Title,
        `Rows of Results` = data_nrow) %>% 
      arrange(Title)

    # TODO: nest spatial dataset results as sub-tables
    #   https://stackoverflow.com/questions/55058126/multiple-child-tables-in-dt-datatable#answer-56486534

    datatable(datasets, escape = F)

  })

Spatial section of downloaded Report

  1. report_template_gen.Rmd#L273-L293:

    if (params$spatial & !is.null(params$spatial_receptors)){
    datasets <- tbl(con, "datasets") %>% 
    collect() %>%
    filter(ready) %>% 
    replace_na(list(buffer_km = 0)) %>% 
    select(-notes, -issues) %>% 
    separate_rows(tags, sep = ";") %>% 
    rename(tag = tags) %>% 
    mutate(
      tag = str_trim(tag)) %>% 
    filter(
      tag %in% params$spatial_receptors) %>% 
    arrange(tag, title)
    
    lapply(1:nrow(datasets), function(i_datasets) {
    knit_expand('_spatial-tag-dataset.Rmd') }) %>% 
    knit_child(text = unlist(.), quiet = T) %>% 
    cat(sep = '\n\n')
    }
  2. _spatial-tag-dataset.Rmd#L4-L11:

    d <- datasets %>% slice({{i_datasets}})
    tabulate_dataset_shp_within_aoi(d$code, params$aoi_wkt)
  3. Then finally here's the function used to generate the report table per spatial dataset intersection with area of interest (aoi): tabulate_dataset_shp_within_aoi() in mhk-env_shiny-apps: functions.R, which pulls from SQL in spatial | marineenergy.app - Google Sheets:

    tabulate_dataset_shp_within_aoi <- function(dataset_code, aoi_wkt, output = "kable"){
    
    if (is.null(aoi_wkt))
    return("Please draw a Location to get a summary of the intersecting features for this dataset.")
    
    ds <- tbl(con, "datasets") %>% 
    filter(code == !!dataset_code) %>% 
    replace_na(list(buffer_nm = 0)) %>% 
    collect()
    
    if (length(aoi_wkt) > 1){
    aoi_wkts <- glue("'SRID=4326;{aoi_wkt}'::geometry")
    aoi_sql  <- glue("ST_COLLECT(\n{paste(aoi_wkts, collapse=',\n')})")
    } else {
    aoi_sql <- glue("'SRID=4326;{aoi_wkt}'")
    }
    
    dbSendQuery(con, glue("DROP TABLE IF EXISTS tmp_aoi CASCADE;"))
    if (!is.na(ds$summarize_sql)){
    sql_intersection <- glue("
      {ds$select_sql} AS ds
      WHERE ST_DWithin(Geography(ds.geometry), {aoi_sql}, {ds$buffer_nm} * 1852);")
    dbExecute(con, glue("CREATE TEMPORARY TABLE tmp_aoi AS {sql_intersection};"))
    x_df <- dbGetQuery(con, ds$summarize_sql)
    } else {
    x_sql <- glue("
      {ds$select_sql} AS ds
      WHERE ST_DWithin(Geography(ds.geometry), {aoi_sql}, {ds$buffer_nm} * 1852);")
    x_sf <- st_read(con, query = x_sql)
    x_df <- st_drop_geometry(x_sf)
    
    if (!is.na(ds$summarize_r))
      eval(parse(text=ds$summarize_r))
    }
    
    if (output == "tibble"){
    return(x_df)
    }
    
    x_spatial <- ifelse(
    ds$buffer_nm == 0,
    glue("\n\nSpatial: within site", .trim = F),
    glue("\n\nSpatial: within {ds$buffer_nm} nautical miles of site", .trim = F))
    
    if (knitr::is_html_output()){
    x_caption <- HTML(markdownToHTML(
      text = glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}"),
      fragment.only = T))
    
    tbl <- x_df %>% 
      kbl(caption = x_caption) %>%
      kable_styling(
        # full_width = F, position = "left", # position = "float_right"
        bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    
    } else {
    x_caption <- glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}")
    
    tbl <- x_df %>% 
      kable(caption = x_caption, format = "pipe")
    }
    
    tbl
    }
geocoug commented 3 years ago

Proposed update using Common Table Expressions (CTE's) instead of temporary tables for tabulate_dataset_shp_within_aoi() function to boost spatial query performance. Potential conflict with temporary tables when multiple users interacting. Temporary tables are omitted from PostgreSQL's VACUUM which can cause unnecessary "garbage". They can be cleaned, but must be done by manually calling vacuum <tablename>.

Additional resources

Update preview

Have not tested on shiny

TODO @bbest

tabulate_dataset_shp_within_aoi <- function(dataset_code, aoi_wkt, output = "kable"){

  if (is.null(aoi_wkt))
    return("Please draw a Location to get a summary of the intersecting features for this dataset.")

  ds <- tbl(con, "datasets") %>% 
    filter(code == !!dataset_code) %>% 
    replace_na(list(buffer_nm = 0)) %>% 
    collect()

  if (length(aoi_wkt) > 1){
    aoi_wkts <- glue("'SRID=4326;{aoi_wkt}'::geometry")
    aoi_sql  <- glue("ST_COLLECT(\n{paste(aoi_wkts, collapse=',\n')})")
  } else {
    aoi_sql <- glue("'SRID=4326;{aoi_wkt}'")
  }

  # Use CTE instead of temporary tables
  # TODO
  #    Add conditional to check if ds$summarize_r
  #    Drop geometry column in x_df?
  if (!is.na(ds$sumarize_sql)){
    x_df <- dbGetQuery(con,
              glue("with 
                    tmp_selarea as (
                      select ST_BUFFER({aoi_sql}, {ds$buffer_nm}) as geom
                    )
                    {ds$select_sql} as ds
                    inner join tmp_selarea on ST_INTERSECTS(ds.geometry, tmp_selarea.geom)
                    "))
  } else {
    x_df <- dbGetQuery(con,
              glue("with 
                    tmp_selarea as (
                      select ST_BUFFER({aoi_sql}, {ds$buffer_nm}) as geom
                    ),
                    tmp_aoi as (
                      {ds$select_sql} as ds
                      inner join tmp_selarea on ST_INTERSECTS(ds.geometry, tmp_selarea.geom)
                    )
                   {ds$summarize_sql}
                   "))
  }

  if (output == "tibble"){
    return(x_df)
  }

  x_spatial <- ifelse(
    ds$buffer_nm == 0,
    glue("\n\nSpatial: within site", .trim = F),
    glue("\n\nSpatial: within {ds$buffer_nm} nautical miles of site", .trim = F))

  if (knitr::is_html_output()){
    x_caption <- HTML(markdownToHTML(
      text = glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}"),
      fragment.only = T))

    tbl <- x_df %>% 
      kbl(caption = x_caption) %>%
      kable_styling(
        # full_width = F, position = "left", # position = "float_right"
        bootstrap_options = c("striped", "hover", "condensed", "responsive"))

  } else {
    x_caption <- glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}")

    tbl <- x_df %>% 
      kable(caption = x_caption, format = "pipe")
  }

  tbl
}
bbest commented 3 years ago

Implemented https://github.com/mhk-env/mhk-env_shiny-apps/commit/e131f317b49d984e06f18d75997977ee670672a2 woohoo 🥳

geocoug commented 3 years ago

Update select ST_BUFFER({aoi_sql}, {ds$buffer_nm}) as geom to select ST_BUFFER({aoi_sql}, {ds$buffer_nm} * 1852) as geom