From 0bbe021524e742f74371a5bc3309a48458d3cdc8 Mon Sep 17 00:00:00 2001 From: Aaron Jacobs Date: Wed, 15 Jan 2025 16:14:16 -0500 Subject: [PATCH] Add support for sourcing repository credentials from the keyring. This commit updates both the metadata and package caches to support downloading packages and package indexes from repositories that require HTTP basic authentication to access. Initial support for these authenticated repositories is very narrow: the repository URL must contain a username, no password, and have an entry in the system keyring. We also don't make any attempt to prompt users for credentials when requests fail. Unit tests are included for the new authentication header helpers, but there are currently no tests of end-to-end workflows with an authenticated repository, and I may have missed something. Part of r-lib/pak#729. Signed-off-by: Aaron Jacobs --- DESCRIPTION | 3 ++ R/async-http.R | 2 + R/auth.R | 91 ++++++++++++++++++++++++++++++++++++++ R/metadata-cache.R | 9 +++- R/package-cache.R | 8 ++-- tests/testthat/test-auth.R | 85 +++++++++++++++++++++++++++++++++++ 6 files changed, 192 insertions(+), 6 deletions(-) create mode 100644 R/auth.R create mode 100644 tests/testthat/test-auth.R diff --git a/DESCRIPTION b/DESCRIPTION index 7da58af4..1b002524 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Imports: jsonlite, processx (>= 3.3.0.9001), R6, + rlang, tools, utils Suggests: @@ -28,7 +29,9 @@ Suggests: debugme, desc, fs, + keyring, mockery, + openssl, pillar, pingr, rprojroot, diff --git a/R/async-http.R b/R/async-http.R index 38ddb6ae..71c298b5 100644 --- a/R/async-http.R +++ b/R/async-http.R @@ -371,6 +371,7 @@ download_files <- function(data, error_on_status = TRUE, row <- data[idx, ] dx <- download_if_newer( row$url, row$path, row$etag, + headers = row$headers[[1L]], on_progress = prog_cb, error_on_status = error_on_status, options = options, ... @@ -380,6 +381,7 @@ download_files <- function(data, error_on_status = TRUE, dx <- dx$catch(error = function(err) { download_if_newer( row$fallback_url, row$path, row$etag, + headers = row$headers[[1L]], error_on_status = error_on_status, options = options, ... ) diff --git a/R/auth.R b/R/auth.R new file mode 100644 index 00000000..9c4c32d8 --- /dev/null +++ b/R/auth.R @@ -0,0 +1,91 @@ +# Returns a set of HTTP headers for the given URL if (1) it belongs to a +# package repository; and (2) has credentials stored in the keyring. +repo_auth_headers <- function(url, allow_prompt = interactive()) { + if (!grepl("/(src|bin)/", url)) { + # Not a package or package index URL. + return(NULL) + } + if (!rlang::is_installed("keyring")) { + return(NULL) + } + creds <- extract_basic_auth_credentials(url) + if (!is.null(creds$password)) { + # The URL already contains a password. This is pretty poor practice, maybe + # we should issue a warning pointing users to the keyring package instead. + return(NULL) + } + if (is.null(creds$username)) { + # No username to key the lookup in the keyring with. + return(NULL) + } + + # In non-interactive contexts, force the use of the environment variable + # backend so that we never hang but can still support CI setups. + backend <- keyring::backend_env + if (allow_prompt) { + backend <- keyring::default_backend() + } + kb <- backend$new() + + # Use the repo URL without the username as the keyring "service". + svc <- extract_repo_url(url) + pwd <- NULL + tryCatch( + { + pwd <- kb$get(svc, creds$username) + }, + error = function(e) NULL + ) + + # Check whether we have one for the hostname as well. + svc <- extract_hostname(url) + tryCatch( + { + pwd <- kb$get(svc, creds$username) + }, + error = function(e) NULL + ) + + if (is.null(pwd)) { + return(NULL) + } + + auth <- paste(creds$username, pwd, sep = ":") + c("Authorization" = paste("Basic", openssl::base64_encode(auth))) +} + +extract_basic_auth_credentials <- function(url) { + pattern <- "^https?://(?:([^:@/]+)(?::([^@/]+))?@)?.*$" + if (!grepl(pattern, url, perl = TRUE)) { + cli::cli_abort("Unrecognized URL format: {.url {url}}.", .internal = TRUE) + } + username <- sub(pattern, "\\1", url, perl = TRUE) + if (!nchar(username)) { + username <- NULL + } + password <- sub(pattern, "\\2", url, perl = TRUE) + if (!nchar(password)) { + password <- NULL + } + list(username = username, password = password) +} + +extract_repo_url <- function(url) { + url <- sub( + "^(https?://)(?:[^:@/]+(?::[^@/]+)?@)?(.*)(/(src|bin)/)(.*)$", + "\\1\\2", + url, + perl = TRUE + ) + # Lop off any /__linux__/ subdirectories, too. + sub("^(.*)/__linux__/[^/]+(/.*)$", "\\1\\2", url, perl = TRUE) +} + +extract_hostname <- function(url) { + sub( + "^(https?://)(?:[^:@/]+(?::[^@/]+)?@)?([^/]+)(.*)", + "\\1\\2", + url, + perl = TRUE + ) +} diff --git a/R/metadata-cache.R b/R/metadata-cache.R index 0ffe672f..a14f479d 100644 --- a/R/metadata-cache.R +++ b/R/metadata-cache.R @@ -737,12 +737,17 @@ cmc__update_replica_pkgs <- function(self, private) { meta <- !is.na(pkgs$meta_url) bin <- !is.na(pkgs$bin_url) - dls <- data.frame( - stringsAsFactors = FALSE, + dls <- data_frame( url = c(pkgs$url, pkgs$meta_url[meta], pkgs$bin_url[bin], bsq_url), fallback_url = c(pkgs$fallback_url, rep(NA_character_, sum(meta) + sum(bin)), NA_character_), path = c(pkgs$path, pkgs$meta_path[meta], pkgs$bin_path[bin], bsq_path), etag = c(pkgs$etag, pkgs$meta_etag[meta], pkgs$bin_etag[bin], bsq_etag), + headers = c( + lapply(pkgs$url, repo_auth_headers), + vector("list", length = sum(meta)), + lapply(pkgs$bin_url[bin], repo_auth_headers), + vector("list", length = 1) + ), timeout = c(rep(c(200, 100), c(nrow(pkgs), sum(meta) + sum(bin))), 5), mayfail = TRUE ) diff --git a/R/package-cache.R b/R/package-cache.R index 8548ee2d..fc2642eb 100644 --- a/R/package-cache.R +++ b/R/package-cache.R @@ -181,7 +181,7 @@ package_cache <- R6Class( self; private; url; path; list(...); .list; on_progress; http_headers target <- tempfile() download_file(url, target, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(url)))$ then(function(res) { headers <- curl::parse_headers(res$response$headers, multiple = TRUE) self$add(target, path, url = url, etag = res$etag, ..., @@ -209,7 +209,7 @@ package_cache <- R6Class( then(function(res) { if (! nrow(res)) { download_one_of(urls, target, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(urls[1])))$ then(function(d) { headers <- curl::parse_headers(d$response$headers, multiple = TRUE) sha256 <- shasum256(target) @@ -249,7 +249,7 @@ package_cache <- R6Class( if (! nrow(res)) { ## Not in the cache, download and add it download_one_of(urls, target, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(urls[1])))$ then(function(d) { headers <- curl::parse_headers(d$response$headers, multiple = TRUE) sha256 <- shasum256(target) @@ -263,7 +263,7 @@ package_cache <- R6Class( cat(res$etag, file = etag <- tempfile()) download_one_of(urls, target, etag_file = etag, on_progress = on_progress, - headers = http_headers)$ + headers = c(http_headers, repo_auth_headers(urls[1])))$ then(function(d) { if (d$response$status_code != 304) { ## No current, update it diff --git a/tests/testthat/test-auth.R b/tests/testthat/test-auth.R new file mode 100644 index 00000000..4f7402ee --- /dev/null +++ b/tests/testthat/test-auth.R @@ -0,0 +1,85 @@ +test_that("looking up auth headers for repositories works as expected", { + skip_if_not_installed("keyring") + + # No package directory in the URL. + expect_null(repo_auth_headers("https://username@ppm.internal/healthz")) + + # The URL already contains a password. + expect_null( + repo_auth_headers( + "https://username:password@ppm.internal/cran/latest/src/contrib/PACKAGES.gz" + ) + ) + + # No username in the repo URL. + expect_null( + repo_auth_headers( + "https://ppm.internal/cran/latest/src/contrib/PACKAGES.gz" + ) + ) + + # Verify that the environment variable keyring backend is picked up correctly. + withr::with_envvar( + c("https://ppm.internal/cran/latest:username" = "token"), + expect_equal( + repo_auth_headers( + "https://username@ppm.internal/cran/__linux__/jammy/latest/src/contrib/PACKAGES.gz", + allow_prompt = FALSE + ), + c("Authorization" = "Basic dXNlcm5hbWU6dG9rZW4=") + ) + ) + + # Verify that we fall back to checking for a hostname credential when none + # is available for a specific repo. + withr::with_envvar( + c("https://ppm.internal:username" = "token"), + expect_equal( + repo_auth_headers( + "https://username@ppm.internal/cran/latest/bin/linux/4.4-jammy/contrib/4.4/PACKAGES.gz", + allow_prompt = FALSE + ), + c("Authorization" = "Basic dXNlcm5hbWU6dG9rZW4=") + ) + ) +}) + +test_that("basic auth credentials can be extracted from various URL formats", { + expect_equal( + extract_basic_auth_credentials("https://user.name:pass-word123@example.com"), + list(username = "user.name", password = "pass-word123") + ) + expect_equal( + extract_basic_auth_credentials("http://user@example.com"), + list(username = "user", password = NULL) + ) + expect_equal( + extract_basic_auth_credentials("https://example.com"), + list(username = NULL, password = NULL) + ) + expect_error( + extract_basic_auth_credentials("notaurl"), + "Unrecognized URL format" + ) +}) + +test_that("we can extract hostnames and repository URLs from package URLs", { + expect_equal( + extract_repo_url( + "https://username@ppm.internal/cran/__linux__/jammy/latest/src/contrib/PACKAGES.gz" + ), + "https://ppm.internal/cran/latest" + ) + expect_equal( + extract_repo_url( + "https://username@ppm.internal/cran/latest/bin/linux/4.4-jammy/contrib/pkg.tar.gz" + ), + "https://ppm.internal/cran/latest" + ) + expect_equal( + extract_hostname( + "https://username@ppm.internal/cran/latest/__linux__/jammy/src/contrib/PACKAGES.gz" + ), + "https://ppm.internal" + ) +})