From 1af6e0f87817c38dd962094953ef092f7a4f3087 Mon Sep 17 00:00:00 2001 From: Kai Aragaki Date: Sat, 4 Nov 2023 16:28:21 -0400 Subject: [PATCH] test: unify outlier column checking and gen --- R/check.R | 11 +++++++++++ R/qp_calc_conc.R | 37 +++++++++++++----------------------- R/qp_fit.R | 8 +------- tests/testthat/test-qp_fit.R | 3 +-- 4 files changed, 26 insertions(+), 33 deletions(-) diff --git a/R/check.R b/R/check.R index 41bc38d..7c01e8d 100644 --- a/R/check.R +++ b/R/check.R @@ -70,3 +70,14 @@ check_pred_conc_mean <- function(x) { # It's possible for predicted values to be negative if (!is.numeric(x)) rlang::abort("`.pred_conc_mean` is not `numeric`") } + +provide_outliers_if_none <- function(x, ignore) { + if (!has_cols(x, ".is_outlier")) { + cli::cli_inform( + c("Did not find column `.is_outlier`", + "Running `qp_mark_outliers` with {.code ignore_outliers = {ignore}}") + ) + x <- qp_mark_outliers(x, ignore_outliers = ignore) + } + x +} diff --git a/R/qp_calc_conc.R b/R/qp_calc_conc.R index b321468..b768097 100644 --- a/R/qp_calc_conc.R +++ b/R/qp_calc_conc.R @@ -23,40 +23,29 @@ qp_calc_conc <- function(x, group_cols = c("sample_type", "index")) { if (is.data.frame(x)) { rlang::warn(c( - "The supplied data was a data.frame, not a list", - "Attempting to calculate a fit using supplied data.frame" + "`x` is a `data.frame`, not a `list`", + "Trying to apply `qp_fit` to `x`" )) - x <- list(fit = qp_fit(x), qp = x) + x <- qp_fit(x) } + group_to_ignore <- ifelse(ignore_outliers, "all", "none") + qp <- x$qp fit <- x$fit check_has_cols(qp, c(group_cols, all.vars(stats::terms(fit))[-1])) + with_predictions <- dplyr::bind_cols(qp, .pred = stats::predict(fit, qp)) - conc <- with_predictions |> + x <- with_predictions |> dplyr::mutate(.pred_conc = 2^(.data$.pred) - 0.5) |> - dplyr::group_by(dplyr::across(dplyr::any_of(group_cols))) - - if (ignore_outliers) { - if (!".is_outlier" %in% colnames(conc)) { - rlang::inform( - "No colname of `.is_outlier` supplied. Calculating outliers." - ) - conc <- dplyr::mutate(conc, .is_outlier = mark_outlier(.data$.pred)) - } - conc <- dplyr::mutate( - conc, + dplyr::group_by(dplyr::across(dplyr::any_of(group_cols))) |> + provide_outliers_if_none(group_to_ignore) |> + dplyr::mutate( .pred_conc_mean = mean( .data$.pred_conc[which(f_or_na(.data$.is_outlier))], na.rm = TRUE ) - ) - } else { - conc <- dplyr::mutate( - conc, - .pred_conc_mean = mean(.data$.pred_conc, na.rm = TRUE) - ) - } - conc <- dplyr::ungroup(conc) - list(fit = fit, qp = conc) + ) |> + dplyr::ungroup() + list(fit = fit, qp = x) } diff --git a/R/qp_fit.R b/R/qp_fit.R index 8eba25f..98e5ed6 100644 --- a/R/qp_fit.R +++ b/R/qp_fit.R @@ -37,13 +37,7 @@ qp_fit.data.frame <- function(x) { x$.log2_abs <- log2(x$.abs) } - if (!has_cols(x, ".is_outlier")) { - rlang::inform( - c("Did not find column `.is_outlier`, fitting with all standards.", - "i" = "To remove outliers, set `ignore_outliers` in `qp_calc_abs_mean`") - ) - x$.is_outlier <- NA - } + x <- provide_outliers_if_none(x, "all") check_has_cols(x, c("sample_type", ".conc")) check_sample_type(x$sample_type) diff --git a/tests/testthat/test-qp_fit.R b/tests/testthat/test-qp_fit.R index cc710cc..498e4c9 100644 --- a/tests/testthat/test-qp_fit.R +++ b/tests/testthat/test-qp_fit.R @@ -15,7 +15,6 @@ test_that(".is_outlier is produced if not present", { x$.log2_abs <- log2(x$.abs) x <- qp_add_std_conc(x) expect_message(qp_fit(x), "Did not find column `\\.is_outlier`") - expect_true(all(is.na(qp_fit(x)$qp$.is_outlier))) }) test_that("fit is consistent", { @@ -27,5 +26,5 @@ test_that("fit is consistent", { ) ) coefs <- round(fit$fit$coefficients, 2) - expect_true(all(coefs == c(2.38, 0.86))) + expect_true(all(coefs == c(2.38, 0.85))) })