diff --git a/DESCRIPTION b/DESCRIPTION index 7da58af..1b00252 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Imports: jsonlite, processx (>= 3.3.0.9001), R6, + rlang, tools, utils Suggests: @@ -28,7 +29,9 @@ Suggests: debugme, desc, fs, + keyring, mockery, + openssl, pillar, pingr, rprojroot, diff --git a/R/async-http.R b/R/async-http.R index 38ddb6a..71c298b 100644 --- a/R/async-http.R +++ b/R/async-http.R @@ -371,6 +371,7 @@ download_files <- function(data, error_on_status = TRUE, row <- data[idx, ] dx <- download_if_newer( row$url, row$path, row$etag, + headers = row$headers[[1L]], on_progress = prog_cb, error_on_status = error_on_status, options = options, ... @@ -380,6 +381,7 @@ download_files <- function(data, error_on_status = TRUE, dx <- dx$catch(error = function(err) { download_if_newer( row$fallback_url, row$path, row$etag, + headers = row$headers[[1L]], error_on_status = error_on_status, options = options, ... ) diff --git a/R/auth.R b/R/auth.R new file mode 100644 index 0000000..9c4c32d --- /dev/null +++ b/R/auth.R @@ -0,0 +1,91 @@ +# Returns a set of HTTP headers for the given URL if (1) it belongs to a +# package repository; and (2) has credentials stored in the keyring. +repo_auth_headers <- function(url, allow_prompt = interactive()) { + if (!grepl("/(src|bin)/", url)) { + # Not a package or package index URL. + return(NULL) + } + if (!rlang::is_installed("keyring")) { + return(NULL) + } + creds <- extract_basic_auth_credentials(url) + if (!is.null(creds$password)) { + # The URL already contains a password. This is pretty poor practice, maybe + # we should issue a warning pointing users to the keyring package instead. + return(NULL) + } + if (is.null(creds$username)) { + # No username to key the lookup in the keyring with. + return(NULL) + } + + # In non-interactive contexts, force the use of the environment variable + # backend so that we never hang but can still support CI setups. + backend <- keyring::backend_env + if (allow_prompt) { + backend <- keyring::default_backend() + } + kb <- backend$new() + + # Use the repo URL without the username as the keyring "service". + svc <- extract_repo_url(url) + pwd <- NULL + tryCatch( + { + pwd <- kb$get(svc, creds$username) + }, + error = function(e) NULL + ) + + # Check whether we have one for the hostname as well. + svc <- extract_hostname(url) + tryCatch( + { + pwd <- kb$get(svc, creds$username) + }, + error = function(e) NULL + ) + + if (is.null(pwd)) { + return(NULL) + } + + auth <- paste(creds$username, pwd, sep = ":") + c("Authorization" = paste("Basic", openssl::base64_encode(auth))) +} + +extract_basic_auth_credentials <- function(url) { + pattern <- "^https?://(?:([^:@/]+)(?::([^@/]+))?@)?.*$" + if (!grepl(pattern, url, perl = TRUE)) { + cli::cli_abort("Unrecognized URL format: {.url {url}}.", .internal = TRUE) + } + username <- sub(pattern, "\\1", url, perl = TRUE) + if (!nchar(username)) { + username <- NULL + } + password <- sub(pattern, "\\2", url, perl = TRUE) + if (!nchar(password)) { + password <- NULL + } + list(username = username, password = password) +} + +extract_repo_url <- function(url) { + url <- sub( + "^(https?://)(?:[^:@/]+(?::[^@/]+)?@)?(.*)(/(src|bin)/)(.*)$", + "\\1\\2", + url, + perl = TRUE + ) + # Lop off any /__linux__/ subdirectories, too. + sub("^(.*)/__linux__/[^/]+(/.*)$", "\\1\\2", url, perl = TRUE) +} + +extract_hostname <- function(url) { + sub( + "^(https?://)(?:[^:@/]+(?::[^@/]+)?@)?([^/]+)(.*)", + "\\1\\2", + url, + perl = TRUE + ) +} diff --git a/R/metadata-cache.R b/R/metadata-cache.R index 0ffe672..a14f479 100644 --- a/R/metadata-cache.R +++ b/R/metadata-cache.R @@ -737,12 +737,17 @@ cmc__update_replica_pkgs <- function(self, private) { meta <- !is.na(pkgs$meta_url) bin <- !is.na(pkgs$bin_url) - dls <- data.frame( - stringsAsFactors = FALSE, + dls <- data_frame( url = c(pkgs$url, pkgs$meta_url[meta], pkgs$bin_url[bin], bsq_url), fallback_url = c(pkgs$fallback_url, rep(NA_character_, sum(meta) + sum(bin)), NA_character_), path = c(pkgs$path, pkgs$meta_path[meta], pkgs$bin_path[bin], bsq_path), etag = c(pkgs$etag, pkgs$meta_etag[meta], pkgs$bin_etag[bin], bsq_etag), + headers = c( + lapply(pkgs$url, repo_auth_headers), + vector("list", length = sum(meta)), + lapply(pkgs$bin_url[bin], repo_auth_headers), + vector("list", length = 1) + ), timeout = c(rep(c(200, 100), c(nrow(pkgs), sum(meta) + sum(bin))), 5), mayfail = TRUE ) diff --git a/R/package-cache.R b/R/package-cache.R index 8548ee2..fc2642e 100644 --- a/R/package-cache.R +++ b/R/package-cache.R @@ -181,7 +181,7 @@ package_cache <- R6Class( self; private; url; path; list(...); .list; on_progress; http_headers target <- tempfile() download_file(url, target, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(url)))$ then(function(res) { headers <- curl::parse_headers(res$response$headers, multiple = TRUE) self$add(target, path, url = url, etag = res$etag, ..., @@ -209,7 +209,7 @@ package_cache <- R6Class( then(function(res) { if (! nrow(res)) { download_one_of(urls, target, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(urls[1])))$ then(function(d) { headers <- curl::parse_headers(d$response$headers, multiple = TRUE) sha256 <- shasum256(target) @@ -249,7 +249,7 @@ package_cache <- R6Class( if (! nrow(res)) { ## Not in the cache, download and add it download_one_of(urls, target, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(urls[1])))$ then(function(d) { headers <- curl::parse_headers(d$response$headers, multiple = TRUE) sha256 <- shasum256(target) @@ -263,7 +263,7 @@ package_cache <- R6Class( cat(res$etag, file = etag <- tempfile()) download_one_of(urls, target, etag_file = etag, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(urls[1])))$ then(function(d) { if (d$response$status_code != 304) { ## No current, update it diff --git a/tests/testthat/test-auth.R b/tests/testthat/test-auth.R new file mode 100644 index 0000000..4f7402e --- /dev/null +++ b/tests/testthat/test-auth.R @@ -0,0 +1,85 @@ +test_that("looking up auth headers for repositories works as expected", { + skip_if_not_installed("keyring") + + # No package directory in the URL. + expect_null(repo_auth_headers("https://username@ppm.internal/healthz")) + + # The URL already contains a password. + expect_null( + repo_auth_headers( + "https://username:password@ppm.internal/cran/latest/src/contrib/PACKAGES.gz" + ) + ) + + # No username in the repo URL. + expect_null( + repo_auth_headers( + "https://ppm.internal/cran/latest/src/contrib/PACKAGES.gz" + ) + ) + + # Verify that the environment variable keyring backend is picked up correctly. + withr::with_envvar( + c("https://ppm.internal/cran/latest:username" = "token"), + expect_equal( + repo_auth_headers( + "https://username@ppm.internal/cran/__linux__/jammy/latest/src/contrib/PACKAGES.gz", + allow_prompt = FALSE + ), + c("Authorization" = "Basic dXNlcm5hbWU6dG9rZW4=") + ) + ) + + # Verify that we fall back to checking for a hostname credential when none + # is available for a specific repo. + withr::with_envvar( + c("https://ppm.internal:username" = "token"), + expect_equal( + repo_auth_headers( + "https://username@ppm.internal/cran/latest/bin/linux/4.4-jammy/contrib/4.4/PACKAGES.gz", + allow_prompt = FALSE + ), + c("Authorization" = "Basic dXNlcm5hbWU6dG9rZW4=") + ) + ) +}) + +test_that("basic auth credentials can be extracted from various URL formats", { + expect_equal( + extract_basic_auth_credentials("https://user.name:pass-word123@example.com"), + list(username = "user.name", password = "pass-word123") + ) + expect_equal( + extract_basic_auth_credentials("http://user@example.com"), + list(username = "user", password = NULL) + ) + expect_equal( + extract_basic_auth_credentials("https://example.com"), + list(username = NULL, password = NULL) + ) + expect_error( + extract_basic_auth_credentials("notaurl"), + "Unrecognized URL format" + ) +}) + +test_that("we can extract hostnames and repository URLs from package URLs", { + expect_equal( + extract_repo_url( + "https://username@ppm.internal/cran/__linux__/jammy/latest/src/contrib/PACKAGES.gz" + ), + "https://ppm.internal/cran/latest" + ) + expect_equal( + extract_repo_url( + "https://username@ppm.internal/cran/latest/bin/linux/4.4-jammy/contrib/pkg.tar.gz" + ), + "https://ppm.internal/cran/latest" + ) + expect_equal( + extract_hostname( + "https://username@ppm.internal/cran/latest/__linux__/jammy/src/contrib/PACKAGES.gz" + ), + "https://ppm.internal" + ) +})