The following concrete example illustrates the motivation for vapour, by through a timing benchmark for one standard operation: extracting feature geometries from a data set within a user-defined bounding box. The data set is the one used throughout the book Geocomputation in R by Robin Lovelace, Jakub Nowosad, and Jannes Muenchow, and can be obtained with the following code.

url <- file.path ("http://www.naturalearthdata.com/http//www.naturalearthdata.com",
download.file (url = url, destfile = "USA_parks.zip")
unzip (zipfile = "USA_parks.zip", exdir = "usa_parks")
fname <- "usa_parks/ne_10m_parks_and_protected_lands_area.shp"

That last fname is the file we're interested in, which contains polygons for all United States parks. We now construct a timing benchmark for three ways of extracting the data within a pre-defined bounding box of:

library (magrittr)
bb <- c (-120, 20, -100, 40)
names (bb) <- c ("xmin", "ymin", "xmax", "ymax")
bb_sf <- sf::st_bbox (bb, crs = sf::st_crs (4326)) %>%
    sf::st_as_sfc ()

First, we define a function to do the desired extraction using the sf package, comparing both st_crop and the sf::[ sub-selection operator:

f_sf1 <- function (fname)
    usa_parks <- sf::st_read (fname, quiet = TRUE)
    suppressMessages (suppressWarnings (
                    parks2 <- sf::st_crop (usa_parks, bb_sf)
f_sf2 <- function (fname)
    usa_parks <- sf::st_read (fname, quiet = TRUE)
    suppressMessages (suppressWarnings (
                    parks2 <- usa_parks [bb_sf, ]

Then three approaches using vapour, in each case extracting equivalent data to sf - that is, both geometries and attributes - yet simply leaving them separate here. The three approaches are:

  1. Read geometry as WKB, sub-select after reading, and convert to sfc lists;
  2. Read geometry as WKB pre-selected via an SQL statement, and convert to sfc lists; and
  3. Read geometry as json (text), pre-selected via SQL, and convert with the geojsonsf package
library (vapour)
f_va1 <- function (fname) # read all then sub-select
    ext <- do.call (rbind, vapour_read_extent (fname)) # bboxes of each feature
    indx <- which (ext [, 1] > bb [1] & ext [, 2] < bb [3] &
                   ext [, 3] > bb [2] & ext [, 4] < bb [4])
    g <- vapour_read_geometry (fname) [indx] %>%
        sf::st_as_sfc ()
    a <- lapply (vapour_read_attributes (fname), function (i) i [indx])
f_va2 <- function (fname) # read selection only via SQL
    ext <- do.call (rbind, vapour_read_extent (fname))
    indx <- which (ext [, 1] > bb [1] & ext [, 2] < bb [3] &
                   ext [, 3] > bb [2] & ext [, 4] < bb [4])
    n <- paste0 (vapour_read_names (fname) [indx], collapse = ",") # GDAL FIDs
    stmt <- paste0 ("SELECT FID FROM ", vapour_layer_names (fname),
                    " WHERE FID in (", n, ")")
    g <- vapour_read_geometry (fname, sql = stmt) %>%
        sf::st_as_sfc ()
    a <- vapour_read_attributes (fname, sql = stmt)
f_va3 <- function (fname) # convert json text via geojsonsf
    ext <- do.call (rbind, vapour_read_extent (fname)) # bboxes of each feature
    indx <- which (ext [, 1] > bb [1] & ext [, 2] < bb [3] &
                   ext [, 3] > bb [2] & ext [, 4] < bb [4])
    n <- paste0 (vapour_read_names (fname) [indx], collapse = ",") # GDAL FIDs
    stmt <- paste0 ("SELECT FID FROM ", vapour_layer_names (fname),
                    " WHERE FID in (", n, ")")
    g <- vapour_read_geometry_text (fname, textformat = "json", sql = stmt) 
    g <- lapply (g, function (i) geojsonsf::geojson_sfc (i) [[1]]) %>%
        sf::st_sfc ()
    a <- vapour_read_attributes (fname, sql = stmt)

The benchmark timings - in particular the "relative" values - then illustrate the advantages of vapour:

rbenchmark::benchmark (
                       f_sf1 (fname),
                       f_sf2 (fname),
                       f_va1 (fname),
                       f_va2 (fname),
                       f_va3 (fname),
                       replications = 10)
junk <- file.remove ("USA_parks.zip")
unlink ("usa_parks", recursive = TRUE)

Reading geometries only, as opposed to the sf reading of all geometries and attributes, affords a speed increase of about 25%, while utilizing the SQL capabilities of ogr_sql offers an increase of around 75%.

