Skip to content

Commit

Permalink
Add support for sourcing repository credentials from the keyring.
Browse files Browse the repository at this point in the history
This commit updates both the metadata and package caches to support
downloading packages and package indexes from repositories that require
HTTP basic authentication to access.

Initial support for these authenticated repositories is very narrow: the
repository URL must contain a username, no password, and have an entry
in the system keyring. We also don't make any attempt to prompt users
for credentials when requests fail.

Unit tests are included for the new authentication header helpers, but
there are currently no tests of end-to-end workflows with an
authenticated repository, and I may have missed something.

Part of r-lib/pak#729.

Signed-off-by: Aaron Jacobs <[email protected]>
  • Loading branch information
atheriel committed Jan 16, 2025
1 parent dc70ac1 commit 9b23bdd
Show file tree
Hide file tree
Showing 6 changed files with 193 additions and 7 deletions.
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@ Imports:
jsonlite,
processx (>= 3.3.0.9001),
R6,
rlang,
tools,
utils
Suggests:
covr,
debugme,
desc,
fs,
keyring,
mockery,
openssl,
pillar,
pingr,
rprojroot,
Expand Down
4 changes: 3 additions & 1 deletion R/async-http.R
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ download_one_of <- function(urls, destfile, etag_file = NULL,
}

download_files <- function(data, error_on_status = TRUE,
options = list(), ...) {
options = list(), headers = NULL, ...) {

if (any(dup <- duplicated(data$path))) {
stop("Duplicate target paths in download_files: ",
Expand All @@ -371,6 +371,7 @@ download_files <- function(data, error_on_status = TRUE,
row <- data[idx, ]
dx <- download_if_newer(
row$url, row$path, row$etag,
headers = c(headers, row$headers[[1L]]),
on_progress = prog_cb,
error_on_status = error_on_status,
options = options, ...
Expand All @@ -380,6 +381,7 @@ download_files <- function(data, error_on_status = TRUE,
dx <- dx$catch(error = function(err) {
download_if_newer(
row$fallback_url, row$path, row$etag,
headers = c(headers, row$headers[[1L]]),
error_on_status = error_on_status,
options = options, ...
)
Expand Down
91 changes: 91 additions & 0 deletions R/auth.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Returns a set of HTTP headers for the given URL if (1) it belongs to a
# package repository; and (2) has credentials stored in the keyring.
repo_auth_headers <- function(url, allow_prompt = interactive()) {
if (!grepl("/(src|bin)/", url)) {
# Not a package or package index URL.
return(NULL)
}
if (!rlang::is_installed("keyring")) {
return(NULL)
}
creds <- extract_basic_auth_credentials(url)
if (!is.null(creds$password)) {
# The URL already contains a password. This is pretty poor practice, maybe
# we should issue a warning pointing users to the keyring package instead.
return(NULL)
}
if (is.null(creds$username)) {
# No username to key the lookup in the keyring with.
return(NULL)
}

# In non-interactive contexts, force the use of the environment variable
# backend so that we never hang but can still support CI setups.
backend <- keyring::backend_env
if (allow_prompt) {
backend <- keyring::default_backend()
}
kb <- backend$new()

# Use the repo URL without the username as the keyring "service".
svc <- extract_repo_url(url)
pwd <- NULL
tryCatch(
{
pwd <- kb$get(svc, creds$username)
},
error = function(e) NULL
)

# Check whether we have one for the hostname as well.
svc <- extract_hostname(url)
tryCatch(
{
pwd <- kb$get(svc, creds$username)
},
error = function(e) NULL
)

if (is.null(pwd)) {
return(NULL)
}

auth <- paste(creds$username, pwd, sep = ":")
c("Authorization" = paste("Basic", openssl::base64_encode(auth)))
}

extract_basic_auth_credentials <- function(url) {
pattern <- "^https?://(?:([^:@/]+)(?::([^@/]+))?@)?.*$"
if (!grepl(pattern, url, perl = TRUE)) {
cli::cli_abort("Unrecognized URL format: {.url {url}}.", .internal = TRUE)
}
username <- sub(pattern, "\\1", url, perl = TRUE)
if (!nchar(username)) {
username <- NULL
}
password <- sub(pattern, "\\2", url, perl = TRUE)
if (!nchar(password)) {
password <- NULL
}
list(username = username, password = password)
}

extract_repo_url <- function(url) {
url <- sub(
"^(https?://)(?:[^:@/]+(?::[^@/]+)?@)?(.*)(/(src|bin)/)(.*)$",
"\\1\\2",
url,
perl = TRUE
)
# Lop off any /__linux__/ subdirectories, too.
sub("^(.*)/__linux__/[^/]+(/.*)$", "\\1\\2", url, perl = TRUE)
}

extract_hostname <- function(url) {
sub(
"^(https?://)(?:[^:@/]+(?::[^@/]+)?@)?([^/]+)(.*)",
"\\1\\2",
url,
perl = TRUE
)
}
9 changes: 7 additions & 2 deletions R/metadata-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -737,12 +737,17 @@ cmc__update_replica_pkgs <- function(self, private) {

meta <- !is.na(pkgs$meta_url)
bin <- !is.na(pkgs$bin_url)
dls <- data.frame(
stringsAsFactors = FALSE,
dls <- data_frame(
url = c(pkgs$url, pkgs$meta_url[meta], pkgs$bin_url[bin], bsq_url),
fallback_url = c(pkgs$fallback_url, rep(NA_character_, sum(meta) + sum(bin)), NA_character_),
path = c(pkgs$path, pkgs$meta_path[meta], pkgs$bin_path[bin], bsq_path),
etag = c(pkgs$etag, pkgs$meta_etag[meta], pkgs$bin_etag[bin], bsq_etag),
headers = c(
lapply(pkgs$url, repo_auth_headers),
vector("list", length = sum(meta)),
lapply(pkgs$bin_url[bin], repo_auth_headers),
vector("list", length = 1)
),
timeout = c(rep(c(200, 100), c(nrow(pkgs), sum(meta) + sum(bin))), 5),
mayfail = TRUE
)
Expand Down
8 changes: 4 additions & 4 deletions R/package-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ package_cache <- R6Class(
self; private; url; path; list(...); .list; on_progress; http_headers
target <- tempfile()
download_file(url, target, on_progress = on_progress,
headers = http_headers)$
headers = c(http_headers, repo_auth_headers(url)))$
then(function(res) {
headers <- curl::parse_headers(res$response$headers, multiple = TRUE)
self$add(target, path, url = url, etag = res$etag, ...,
Expand Down Expand Up @@ -209,7 +209,7 @@ package_cache <- R6Class(
then(function(res) {
if (! nrow(res)) {
download_one_of(urls, target, on_progress = on_progress,
headers = http_headers)$
headers = c(http_headers, repo_auth_headers(urls[1])))$
then(function(d) {
headers <- curl::parse_headers(d$response$headers, multiple = TRUE)
sha256 <- shasum256(target)
Expand Down Expand Up @@ -249,7 +249,7 @@ package_cache <- R6Class(
if (! nrow(res)) {
## Not in the cache, download and add it
download_one_of(urls, target, on_progress = on_progress,
headers = http_headers)$
headers = c(http_headers, repo_auth_headers(urls[1])))$
then(function(d) {
headers <- curl::parse_headers(d$response$headers, multiple = TRUE)
sha256 <- shasum256(target)
Expand All @@ -263,7 +263,7 @@ package_cache <- R6Class(
cat(res$etag, file = etag <- tempfile())
download_one_of(urls, target, etag_file = etag,
on_progress = on_progress,
headers = http_headers)$
headers = c(http_headers, repo_auth_headers(urls[1])))$
then(function(d) {
if (d$response$status_code != 304) {
## No current, update it
Expand Down
85 changes: 85 additions & 0 deletions tests/testthat/test-auth.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
test_that("looking up auth headers for repositories works as expected", {
skip_if_not_installed("keyring")

# No package directory in the URL.
expect_null(repo_auth_headers("https://[email protected]/healthz"))

# The URL already contains a password.
expect_null(
repo_auth_headers(
"https://username:[email protected]/cran/latest/src/contrib/PACKAGES.gz"
)
)

# No username in the repo URL.
expect_null(
repo_auth_headers(
"https://ppm.internal/cran/latest/src/contrib/PACKAGES.gz"
)
)

# Verify that the environment variable keyring backend is picked up correctly.
withr::with_envvar(
c("https://ppm.internal/cran/latest:username" = "token"),
expect_equal(
repo_auth_headers(
"https://[email protected]/cran/__linux__/jammy/latest/src/contrib/PACKAGES.gz",
allow_prompt = FALSE
),
c("Authorization" = "Basic dXNlcm5hbWU6dG9rZW4=")
)
)

# Verify that we fall back to checking for a hostname credential when none
# is available for a specific repo.
withr::with_envvar(
c("https://ppm.internal:username" = "token"),
expect_equal(
repo_auth_headers(
"https://[email protected]/cran/latest/bin/linux/4.4-jammy/contrib/4.4/PACKAGES.gz",
allow_prompt = FALSE
),
c("Authorization" = "Basic dXNlcm5hbWU6dG9rZW4=")
)
)
})

test_that("basic auth credentials can be extracted from various URL formats", {
expect_equal(
extract_basic_auth_credentials("https://user.name:[email protected]"),
list(username = "user.name", password = "pass-word123")
)
expect_equal(
extract_basic_auth_credentials("http://[email protected]"),
list(username = "user", password = NULL)
)
expect_equal(
extract_basic_auth_credentials("https://example.com"),
list(username = NULL, password = NULL)
)
expect_error(
extract_basic_auth_credentials("notaurl"),
"Unrecognized URL format"
)
})

test_that("we can extract hostnames and repository URLs from package URLs", {
expect_equal(
extract_repo_url(
"https://[email protected]/cran/__linux__/jammy/latest/src/contrib/PACKAGES.gz"
),
"https://ppm.internal/cran/latest"
)
expect_equal(
extract_repo_url(
"https://[email protected]/cran/latest/bin/linux/4.4-jammy/contrib/pkg.tar.gz"
),
"https://ppm.internal/cran/latest"
)
expect_equal(
extract_hostname(
"https://[email protected]/cran/latest/__linux__/jammy/src/contrib/PACKAGES.gz"
),
"https://ppm.internal"
)
})

0 comments on commit 9b23bdd

Please sign in to comment.