sckott / webmiddens

cache http requests
https://sckott.github.io/webmiddens
Other
10 stars 1 forks source link
api caching fakeweb http http-cache http-mocking r rstats web

webmiddens

Project Status: Active – The project has reached a stable, usable state and is being actively developed. R-CMD-check codecov

simple caching of HTTP requests/responses, hooking into webmockr (https://github.com/ropensci/webmockr) for the HTTP request matching

A midden is a debris pile constructed by a woodrat/pack rat (https://en.wikipedia.org/wiki/Pack_rat#Midden)

the need

brainstorming

http libraries

right now we only support crul, but httr support should arrive soon

installation

remotes::install_github("sckott/webmiddens")

use_midden()

library(webmiddens)
library(crul)

Let's say you have some function http_request() that does an HTTP request that you re-use in various parts of your project or package

http_request <- function(...) {
  x <- crul::HttpClient$new("https://httpbin.org", opts = list(...))
  x$get("get")
}

And you have a function some_fxn() that uses http_request() to do the HTTP request, then proces the results to a data.frame or list, etc. This is a super common pattern in a project or R package that deals with web resources.

some_fxn <- function(...) {
  res <- http_request(...)
  jsonlite::fromJSON(res$parse("UTF-8"))
}

Without webmiddens the HTTP request happens as usual and all is good

some_fxn()
#> $args
#> named list()
#> 
#> $headers
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#> 
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#> 
#> $headers$Host
#> [1] "httpbin.org"
#> 
#> $headers$`User-Agent`
#> [1] "libcurl/7.74.0 r-curl/4.3 crul/1.0.2.92"
#> 
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5fd29de8-0e978093689e02246d0b3d92"
#> 
#> 
#> $origin
#> [1] "24.21.229.59"
#> 
#> $url
#> [1] "https://httpbin.org/get"

Now, with webmiddens

run wm_configuration() first to set the path where HTTP requests will be cached

wm_configuration("foo1")
#> configuring midden from $path

first request is a real HTTP request

res1 <- use_midden(some_fxn())
res1
#> $args
#> named list()
#> 
#> $headers
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#> 
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#> 
#> $headers$Host
#> [1] "httpbin.org"
#> 
#> $headers$`User-Agent`
#> [1] "libcurl/7.74.0 r-curl/4.3 crul/1.0.2.92"
#> 
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5fd29de8-3ad69a2f59e45afc48446e85"
#> 
#> 
#> $origin
#> [1] "24.21.229.59"
#> 
#> $url
#> [1] "https://httpbin.org/get"

second request uses the cached response from the first request

res2 <- use_midden(some_fxn())
res2
#> $args
#> named list()
#> 
#> $headers
#> $headers$Accept
#> [1] "application/json, text/xml, application/xml, */*"
#> 
#> $headers$`Accept-Encoding`
#> [1] "gzip, deflate"
#> 
#> $headers$Host
#> [1] "httpbin.org"
#> 
#> $headers$`User-Agent`
#> [1] "libcurl/7.74.0 r-curl/4.3 crul/1.0.2.92"
#> 
#> $headers$`X-Amzn-Trace-Id`
#> [1] "Root=1-5fd29de8-65506d0055d8b5c874949851"
#> 
#> 
#> $origin
#> [1] "24.21.229.59"
#> 
#> $url
#> [1] "https://httpbin.org/get"

the midden class

x <- midden$new()
x # no path
#> <midden> 
#>   path: 
#>   expiry (sec): not set
# Run $init() to set the path
x$init(path = "forest")
x
#> <midden> 
#>   path: /Users/sckott/Library/Caches/R/forest
#>   expiry (sec): not set

The cache slot has a hoardr object which you can use to fiddle with files, see ?hoardr::hoard

x$cache
#> <hoard> 
#>   path: forest
#>   cache path: /Users/sckott/Library/Caches/R/forest

Use expire() to set the expire time (in seconds). You can set it through passing to expire() or through the environment variable WEBMIDDENS_EXPIRY_SEC

x$expire()
#> NULL
x$expire(5)
#> [1] 5
x$expire()
#> [1] 5
x$expire(reset = TRUE)
#> NULL
x$expire()
#> NULL
Sys.setenv(WEBMIDDENS_EXPIRY_SEC = 35)
x$expire()
#> [1] 35
x$expire(reset = TRUE)
#> NULL
x$expire()
#> NULL

FIXME: The below not working right now - figure out why

wm_enable()
con <- crul::HttpClient$new("https://httpbin.org")
# first request is a real HTTP request
x$r(con$get("get", query = list(stuff = "bananas")))
# following requests use the cached response
x$r(con$get("get", query = list(stuff = "bananas")))

verbose output

x <- midden$new(verbose = TRUE)
x$init(path = "rainforest")
x$r(con$get("get", query = list(stuff = "bananas")))

set expiration time

x <- midden$new()
x$init(path = "grass")
x$expire(3)
x

Delete all the files in your "midden" (the folder with cached files)

x$cleanup()

Delete the "midden" (the folder with cached files)

x$destroy()

Meta