Closed hadley closed 9 months ago
Now on CRAN: https://cran.r-project.org/web/packages/chromote/
library(chromote)
b <- ChromoteSession$new()
b$Page$navigate("https://www.r-project.org/")
b$view()
x <- b$DOM$getDocument()
p <- b$DOM$querySelector(x$root$nodeId, "p")
b$DOM$getOuterHTML(p$nodeId)
And need to implement interface similar to https://github.com/rstudio/shinytest/blob/main/R/shiny-driver.R so you can "click" and set values
Sample problems:
# https://twitter.com/ryanburge/status/1592910141128867841 --------------------
url <- "https://www.christianitytoday.com/ct/search/?query=abortion"
ct <- read_html_live(url)
items <- ct %>% html_elements(".ais-Hits-item")
items %>% html_element(".title-2") %>% html_text2()
items %>% html_element(".d-inline-block") %>% html_text2()
# https://twitter.com/minebocek/status/1592917026049183745 --------------------
url <- "https://www.forbes.com/top-colleges/"
sess <- read_html_live(url)
sess |>
html_elements(".TopColleges2023_tableRowGroup__65RHn") |>
html_elements(".TopColleges2023_tableRow__BYOSU")
# https://twitter.com/joshmccrain/status/1592910563327496199 -------------------
# http://joshuamccrain.com/tutorials/web_scraping_R_selenium.html
sess <- read_html_live("https://www.fcc.gov/media/engineering/dtvmaps")
sess$view()
sess$type("#startpoint", "77003")
sess$press("#startpoint", "Enter")
sess |>
html_table() |>
html_element("#contourdata > table:nth-child(3)") |>
select(-1) |>
filter(Callsign != "")
sess$press("#startpoint", "ArrowLeft", modifiers = c("Alt", "Shift"))
sess$press("#startpoint", "Delete")
# https://twitter.com/jdsdog10/status/1592912450625884160 ---------------------
url <- "https://www.fantasypros.com/nfl/rankings/half-point-ppr-rb.php"
sess <- read_html_live(url)
sess$view()
sess |> html_element("#ranking-table") |> html_table()
# https://twitter.com/sctyner/status/1592921110730797057 -----------------------
# need to click "load more" a few times
sess <- read_html_live("https://www.bodybuilding.com/exercises/finder")
sess |> html_elements(".ExResult-row") |> length()
sess$click(".ExLoadMore-btn")
sess |> html_elements(".ExResult-row") |> length()
sess$click(".ExLoadMore-btn")
sess |> html_elements(".ExResult-row") |> length()
To use xpath, need to do something like:
document_method <- function(session, method, ...) {
root_id <- sess$session$DOM$getDocument()$root$nodeId
obj_id <- session$DOM$resolveNode(root_id)$object$objectId
js_fun <- paste0("function() { return this", method, "}")
session$Runtime$callFunctionOn(js_fun, objectId = obj_id, ...)
}
document_method(sess$session, ".evaluate('/', document, null, XPathResult. ORDERED_NODE_ITERATOR_TYPE, null)")
That gives you an XPathResult object which you need to iterate over with something like:
let thisHeading = headings.iterateNext();
let alertText = "Level 2 headings in this document are:\n";
while (thisHeading) {
alertText += `${thisHeading.textContent}\n`;
thisHeading = headings.iterateNext();
}
But instead building up an array of nodes.
(function() {
let xpath = document.evaluate('//p', document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
let nodes = [];
let thisNode = xpath.iterateNext();
while (thisNode) {
nodes.push(thisNode);
thisNode = xpath.iterateNext();
}
return nodes;
})();
Tried and failed to simulate a mouse click:
click = function(css) {
node <- private$wait_for_selector(css)
loc <- private$get_bounding_rect(node)
# https://chromedevtools.github.io/devtools-protocol/1-3/Input/#method-dispatchMouseEvent
self$session$Input$dispatchMouseEvent(
type = "mousePressed",
x = (loc$left + loc$right) / 2,
y = (loc$top + loc$bottom) / 2,
button = "left",
clickCount = 1
)
self$session$Input$dispatchMouseEvent(
type = "mouseReleased",
x = (loc$left + loc$right) / 2,
y = (loc$top + loc$bottom) / 2,
button = "left",
clickCount = 1
)
invisible(self)
},
get_bounding_rect = function(node) {
out <- private$call_node_method(
node,
".getBoundingClientRect().toJSON()",
returnByValue = TRUE
)
out$result$value
},
Always timed out when setting the button
argument
Which implements https://chromedevtools.github.io/devtools-protocol/tot/Input