PolMine / dbpedia

R Wrapper for Corpus Annotation with DBpedia Spotlight
3 stars 0 forks source link

Replace SPARQL::SPARQL() #21

Closed ablaette closed 1 year ago

ablaette commented 1 year ago

We rely on the SPARQL::SPARQL() function. The SPARQL package cannot be installed from CRAN and we need to load RCurl and XML seperately. Looking at the code of the function, it should not be too complicated to develop a replacement,

function (url = "http://localhost/", query = "", update = "", 
    ns = NULL, param = "", extra = NULL, format = "xml", curl_args = NULL, 
    parser_args = NULL) 
{
    if (!is.null(extra)) {
        extrastr <- paste("&", sapply(seq(1, length(extra)), 
            function(i) {
                paste(names(extra)[i], "=", URLencode(extra[[i]]), 
                  sep = "")
            }), collapse = "&", sep = "")
    }
    else {
        extrastr <- ""
    }
    tf <- tempfile()
    if (query != "") {
        if (param == "") {
            param <- "query"
        }
        if (format == "xml") {
            tf <- do.call(getURL, append(list(url = paste(url, 
                "?", param, "=", gsub("\\+", "%2B", URLencode(query, 
                  reserved = TRUE)), extrastr, sep = ""), httpheader = c(Accept = "application/sparql-results+xml")), 
                curl_args))
            DOM <- do.call(xmlParse, append(list(tf), parser_args))
            if (length(getNodeSet(DOM, "//s:result[1]", namespaces = sparqlns)) == 
                0) {
                rm(DOM)
                df <- data.frame(c())
            }
            else {
                attrs <- unlist(xpathApply(DOM, paste("//s:head/s:variable", 
                  sep = ""), namespaces = sparqlns, quote(xmlGetAttr(x, 
                  "name"))))
                ns2 <- noBrackets(ns)
                res <- get_attr(attrs, DOM, ns2)
                df <- data.frame(res)
                rm(res)
                rm(DOM)
                n = names(df)
                for (r in 1:length(n)) {
                  name <- n[r]
                  df[name] <- as.vector(unlist(df[name]))
                }
            }
        }
        else if (format == "csv") {
            tf <- do.call(getURL, append(list(url = paste(url, 
                "?", param, "=", gsub("\\+", "%2B", URLencode(query, 
                  reserved = TRUE)), extrastr, sep = "")), curl_args))
            df <- do.call(readCSVstring, append(list(tf, blank.lines.skip = TRUE, 
                strip.white = TRUE), parser_args))
            if (!is.null(ns)) 
                df <- dropNS(df, ns)
        }
        else if (format == "tsv") {
            tf <- do.call(getURL, append(list(url = paste(url, 
                "?", param, "=", gsub("\\+", "%2B", URLencode(query, 
                  reserved = TRUE)), extrastr, sep = "")), curl_args))
            df <- do.call(readTSVstring, append(list(tf, blank.lines.skip = TRUE, 
                strip.white = TRUE), parser_args))
            if (!is.null(ns)) 
                df <- dropNS(df, ns)
        }
        else {
            cat("unknown format \"", format, "\"\n\n", sep = "")
            return(list(results = NULL, namespaces = ns))
        }
        list(results = df, namespaces = ns)
    }
    else if (update != "") {
        if (param == "") {
            param <- "update"
        }
        extra[[param]] <- update
        do.call(postForm, append(list(url, .params = extra), 
            curl_args))
    }
}
ablaette commented 1 year ago

File 'utils.R' now includes a simplified version of SPARQL::SPARLQ() that replaces packages XML and RCurl with xml2 and httr.

sparql_query <- function(endpoint, query){

  stopifnot(
    is.character(endpoint), length(endpoint) == 1L,
    is.character(query), length(query) == 1L
  )

  url <- paste(
    endpoint,
    "?query=",
    gsub("\\+", "%2B", URLencode(query, reserved = TRUE)),
    sep = ""
  )

  results <- GET(
    url = url,
    add_headers(Accept = "application/sparql-results+xml")
  )

  content <- content(results, as = "text")

  dom <- read_xml(x = content)
  results <- xml_find_all(x = dom, xpath = "//d1:result")

  if (length(results) == 0L) return(data.frame(c()))

  vars <- xml_find_all(dom, xpath = "//d1:head/d1:variable")
  attrs <- xml_attr(vars, attr = "name")

  data <- lapply(
    attrs,
    function(attr){
      nodes <- xml_find_all(
        results,
        xpath = sprintf("//d1:binding[@name='%s']", attr)
      )
      if (length(nodes) == 0L){
        rep(NA, times = length(results))
      } else {
        xml_text(nodes)
      }
    }
  )
  names(data) <- attrs
  data.frame(data)
}