r-lib / httr2

Make HTTP requests and process their responses. A modern reimagining of httr.
https://httr2.r-lib.org
Other
237 stars 59 forks source link

AWS signing #566

Closed hadley closed 1 month ago

hadley commented 1 month ago

If curl > 7.75.0, use https://curl.se/libcurl/c/CURLOPT_AWS_SIGV4.html. Otherwise fall back to something based on this code from @kalimu:

req_sign_aws_v4_auth <- function(
    req,
    aws_service,
    aws_access_key_id     = Sys.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key = Sys.getenv("AWS_SECRET_ACCESS_KEY"),
    aws_session_token     = Sys.getenv("AWS_SESSION_TOKEN"),
    aws_region            = Sys.getenv("AWS_DEFAULT_REGION"),
    current_time          = Sys.time(),
    debug                 = FALSE
    ) {

  host     <- httr2::url_parse(req$url)$hostname
  req_time <- format(current_time, "%Y%m%dT%H%M%SZ", tz = "UTC")

  if (is.null(req$body)) {

    body <- ""

  } else {

    body <-
      req$body$data %>%
      jsonlite::toJSON(auto_unbox = TRUE)
  }

  content_sha256 <- digest::digest(body, algo = "sha256", serialize = FALSE)

  if (is.null(req$method)) {
    req <-
      req %>%
      httr2::req_method("GET")
  }

  req <-
    req %>%
    httr2::req_headers("Accept" = "application/json") %>%
    httr2::req_headers("host"                 = host,
                       "x-amz-date"           = req_time,
                       "x-amz-content-sha256" = content_sha256,
                       "x-amz-security-token" = aws_session_token)

  query_args <- httr2::url_parse(req$url)$query

  if (length(query_args)) {

    query_args   <- unlist(query_args[order(names(query_args))])

    query_string <- paste0(sapply(names(query_args),
                                  URLencode,
                                  reserved = TRUE),
                           "=",
                           sapply(as.character(query_args),
                                  URLencode,
                                  reserved = TRUE))

    query_string <- paste(query_string, sep = "", collapse = "&")

  } else {

    query_string <- ""
  }

  canonical_headers        <- req$headers
  names(canonical_headers) <- tolower(names(canonical_headers))

  canonical_headers <- canonical_headers[order(names(canonical_headers))]
  trimmed_headers   <- gsub("[[:space:]]{2,}", " ", trimws(canonical_headers))

  header_string  <- paste0(names(canonical_headers), ":", trimmed_headers, "\n",
                          collapse = "")
  signed_headers <- paste(names(canonical_headers),
                          sep      = "",
                          collapse = ";")

  path <- httr2::url_parse(req$url)$path
  path <- ifelse(is.null(path), "/", path)
  path <- gsub(x = path, pattern = ":", replacement = "%3A")

  canonical_request <- paste(req$method,
                             path,
                             query_string,
                             header_string,
                             signed_headers,
                             content_sha256,
                             sep = "\n")

  if (debug) {
    print("Canonical request:")
    cat(canonical_request, "\n")
  }

  canonical_request_hash <- digest::digest(canonical_request,
                                           algo      = "sha256",
                                           serialize = FALSE)
  algorithm <- "AWS4-HMAC-SHA256"
  region    <- aws_region
  service   <- aws_service

  string_to_sign <- paste(
    algorithm,
    req_time,
    paste(substring(req_time, 1, 8),
          region,
          service,
          "aws4_request",
          sep = "/"),
    canonical_request_hash,
    sep = "\n")

  if (debug) {
    print("String to sign:")
    cat(string_to_sign, "\n")
  }

  date <- format(current_time, "%Y%m%d")

  hash_date <-
    digest::hmac(key    = paste0("AWS4", aws_secret_access_key),
                 object = date,
                 algo   = "sha256",
                 raw    = TRUE)

  hash_region <-
    digest::hmac(key    = hash_date,
                 object = region,
                 algo   = "sha256",
                 raw    = TRUE)

  hash_service <-
    digest::hmac(key    = hash_region,
                 object = service,
                 algo   = "sha256",
                 raw    = TRUE)

  hash_aws4_request <-
    digest::hmac(key    = hash_service,
                 object = "aws4_request",
                 algo   = "sha256",
                 raw    = TRUE)

  signature <-
    digest::hmac(key    = hash_aws4_request,
                 object = string_to_sign,
                 algo   = "sha256")

  if (debug) {
    print("Signature:")
    cat(signature, "\n")
  }

  credential <-  paste(aws_access_key_id,
                       date,
                       region,
                       service,
                       "aws4_request",
                       sep = "/")

  authorization_header <-
    paste(algorithm,
          paste(
            paste0("Credential=",    credential),
            paste0("SignedHeaders=", signed_headers),
            paste0("Signature=",     signature),
            sep = ","))

  if (debug) {
    print("Authorization header:")
    cat(authorization_header, "\n")
  }

  req <-
    req %>%
    httr2::req_headers(
      Authorization = authorization_header)

  req
}
kalimu commented 1 month ago

One correction to the code (as the body could be also of raw type).

if (is.null(req$body)) {

    body <- ""

  } else if (request$body$type == "raw") {

    body <- req$body$data

  } else {

    body <-
      req$body$data %>%
      jsonlite::toJSON(auto_unbox = TRUE)
  }
hadley commented 1 month ago

@kalimu I ended up almost completely rewriting your code in #569 (just so I fully understand it and I'm equipped to maintain it in the future), but I can't tell you how much it helped to have a working implementation to start from. I've had to write code like this in the past and it's so frustrating because you have to have all the pieces perfectly aligned for it to work. Having something that already works that I can refactor saved me hours of frustration. Thank you!