From e583e94644fe27c2fa9001dff8928eadc99c7fc7 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 30 Oct 2024 17:02:21 -0700 Subject: [PATCH 1/3] add rlang type checkers --- DESCRIPTION | 2 +- R/discretize_cart.R | 2 +- R/discretize_xgb.R | 2 +- R/embed.R | 2 +- R/feature_hash.R | 2 +- R/import-standalone-obj-type.R | 364 ++++++++++++++++++++ R/import-standalone-types-check.R | 554 ++++++++++++++++++++++++++++++ R/pca_sparse.R | 2 +- R/pca_sparse_bayes.R | 2 +- R/pca_truncated.R | 2 +- R/umap.R | 2 +- 11 files changed, 927 insertions(+), 9 deletions(-) create mode 100644 R/import-standalone-obj-type.R create mode 100644 R/import-standalone-types-check.R diff --git a/DESCRIPTION b/DESCRIPTION index 9d01126..09f3d9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ Imports: generics (>= 0.1.0), lifecycle, purrr, - rlang (>= 0.4.10), + rlang (>= 1.1.0), rsample, stats, tibble, diff --git a/R/discretize_cart.R b/R/discretize_cart.R index a0d7748..e3c5df1 100644 --- a/R/discretize_cart.R +++ b/R/discretize_cart.R @@ -261,7 +261,7 @@ bake.step_discretize_cart <- function(object, new_data, ...) { dig.lab = 4 ) - check_name(binned_data, new_data, object) + recipes::check_name(binned_data, new_data, object) new_data <- binned_data } } diff --git a/R/discretize_xgb.R b/R/discretize_xgb.R index 4e5a40b..e98aa95 100644 --- a/R/discretize_xgb.R +++ b/R/discretize_xgb.R @@ -496,7 +496,7 @@ bake.step_discretize_xgb <- function(object, new_data, ...) { dig.lab = 4 ) - check_name(binned_data, new_data, object) + recipes::check_name(binned_data, new_data, object) new_data <- binned_data } } diff --git a/R/embed.R b/R/embed.R index f01f0b5..ca69039 100644 --- a/R/embed.R +++ b/R/embed.R @@ -429,7 +429,7 @@ bake.step_embed <- function(object, new_data, ...) { prefix = col_name ) - tmp <- check_name(tmp, new_data, object, names(tmp)) + tmp <- recipes::check_name(tmp, new_data, object, names(tmp)) new_data <- vec_cbind(new_data, tmp) } diff --git a/R/feature_hash.R b/R/feature_hash.R index e67790a..14d62c2 100644 --- a/R/feature_hash.R +++ b/R/feature_hash.R @@ -222,7 +222,7 @@ bake.step_feature_hash <- function(object, new_data, ...) { object$num_hash ) - new_cols <- check_name(new_cols, new_data, object, names(new_cols)) + new_cols <- recipes::check_name(new_cols, new_data, object, names(new_cols)) new_data <- vec_cbind(new_data, new_cols) diff --git a/R/import-standalone-obj-type.R b/R/import-standalone-obj-type.R new file mode 100644 index 0000000..47268d6 --- /dev/null +++ b/R/import-standalone-obj-type.R @@ -0,0 +1,364 @@ +# Standalone file: do not edit by hand +# Source: https://github.com/r-lib/rlang/blob/HEAD/R/standalone-obj-type.R +# Generated by: usethis::use_standalone("r-lib/rlang", "obj-type") +# ---------------------------------------------------------------------- +# +# --- +# repo: r-lib/rlang +# file: standalone-obj-type.R +# last-updated: 2024-02-14 +# license: https://unlicense.org +# imports: rlang (>= 1.1.0) +# --- +# +# ## Changelog +# +# 2024-02-14: +# - `obj_type_friendly()` now works for S7 objects. +# +# 2023-05-01: +# - `obj_type_friendly()` now only displays the first class of S3 objects. +# +# 2023-03-30: +# - `stop_input_type()` now handles `I()` input literally in `arg`. +# +# 2022-10-04: +# - `obj_type_friendly(value = TRUE)` now shows numeric scalars +# literally. +# - `stop_friendly_type()` now takes `show_value`, passed to +# `obj_type_friendly()` as the `value` argument. +# +# 2022-10-03: +# - Added `allow_na` and `allow_null` arguments. +# - `NULL` is now backticked. +# - Better friendly type for infinities and `NaN`. +# +# 2022-09-16: +# - Unprefixed usage of rlang functions with `rlang::` to +# avoid onLoad issues when called from rlang (#1482). +# +# 2022-08-11: +# - Prefixed usage of rlang functions with `rlang::`. +# +# 2022-06-22: +# - `friendly_type_of()` is now `obj_type_friendly()`. +# - Added `obj_type_oo()`. +# +# 2021-12-20: +# - Added support for scalar values and empty vectors. +# - Added `stop_input_type()` +# +# 2021-06-30: +# - Added support for missing arguments. +# +# 2021-04-19: +# - Added support for matrices and arrays (#141). +# - Added documentation. +# - Added changelog. +# +# nocov start + +#' Return English-friendly type +#' @param x Any R object. +#' @param value Whether to describe the value of `x`. Special values +#' like `NA` or `""` are always described. +#' @param length Whether to mention the length of vectors and lists. +#' @return A string describing the type. Starts with an indefinite +#' article, e.g. "an integer vector". +#' @noRd +obj_type_friendly <- function(x, value = TRUE) { + if (is_missing(x)) { + return("absent") + } + + if (is.object(x)) { + if (inherits(x, "quosure")) { + type <- "quosure" + } else { + type <- class(x)[[1L]] + } + return(sprintf("a <%s> object", type)) + } + + if (!is_vector(x)) { + return(.rlang_as_friendly_type(typeof(x))) + } + + n_dim <- length(dim(x)) + + if (!n_dim) { + if (!is_list(x) && length(x) == 1) { + if (is_na(x)) { + return(switch( + typeof(x), + logical = "`NA`", + integer = "an integer `NA`", + double = + if (is.nan(x)) { + "`NaN`" + } else { + "a numeric `NA`" + }, + complex = "a complex `NA`", + character = "a character `NA`", + .rlang_stop_unexpected_typeof(x) + )) + } + + show_infinites <- function(x) { + if (x > 0) { + "`Inf`" + } else { + "`-Inf`" + } + } + str_encode <- function(x, width = 30, ...) { + if (nchar(x) > width) { + x <- substr(x, 1, width - 3) + x <- paste0(x, "...") + } + encodeString(x, ...) + } + + if (value) { + if (is.numeric(x) && is.infinite(x)) { + return(show_infinites(x)) + } + + if (is.numeric(x) || is.complex(x)) { + number <- as.character(round(x, 2)) + what <- if (is.complex(x)) "the complex number" else "the number" + return(paste(what, number)) + } + + return(switch( + typeof(x), + logical = if (x) "`TRUE`" else "`FALSE`", + character = { + what <- if (nzchar(x)) "the string" else "the empty string" + paste(what, str_encode(x, quote = "\"")) + }, + raw = paste("the raw value", as.character(x)), + .rlang_stop_unexpected_typeof(x) + )) + } + + return(switch( + typeof(x), + logical = "a logical value", + integer = "an integer", + double = if (is.infinite(x)) show_infinites(x) else "a number", + complex = "a complex number", + character = if (nzchar(x)) "a string" else "\"\"", + raw = "a raw value", + .rlang_stop_unexpected_typeof(x) + )) + } + + if (length(x) == 0) { + return(switch( + typeof(x), + logical = "an empty logical vector", + integer = "an empty integer vector", + double = "an empty numeric vector", + complex = "an empty complex vector", + character = "an empty character vector", + raw = "an empty raw vector", + list = "an empty list", + .rlang_stop_unexpected_typeof(x) + )) + } + } + + vec_type_friendly(x) +} + +vec_type_friendly <- function(x, length = FALSE) { + if (!is_vector(x)) { + abort("`x` must be a vector.") + } + type <- typeof(x) + n_dim <- length(dim(x)) + + add_length <- function(type) { + if (length && !n_dim) { + paste0(type, sprintf(" of length %s", length(x))) + } else { + type + } + } + + if (type == "list") { + if (n_dim < 2) { + return(add_length("a list")) + } else if (is.data.frame(x)) { + return("a data frame") + } else if (n_dim == 2) { + return("a list matrix") + } else { + return("a list array") + } + } + + type <- switch( + type, + logical = "a logical %s", + integer = "an integer %s", + numeric = , + double = "a double %s", + complex = "a complex %s", + character = "a character %s", + raw = "a raw %s", + type = paste0("a ", type, " %s") + ) + + if (n_dim < 2) { + kind <- "vector" + } else if (n_dim == 2) { + kind <- "matrix" + } else { + kind <- "array" + } + out <- sprintf(type, kind) + + if (n_dim >= 2) { + out + } else { + add_length(out) + } +} + +.rlang_as_friendly_type <- function(type) { + switch( + type, + + list = "a list", + + NULL = "`NULL`", + environment = "an environment", + externalptr = "a pointer", + weakref = "a weak reference", + S4 = "an S4 object", + + name = , + symbol = "a symbol", + language = "a call", + pairlist = "a pairlist node", + expression = "an expression vector", + + char = "an internal string", + promise = "an internal promise", + ... = "an internal dots object", + any = "an internal `any` object", + bytecode = "an internal bytecode object", + + primitive = , + builtin = , + special = "a primitive function", + closure = "a function", + + type + ) +} + +.rlang_stop_unexpected_typeof <- function(x, call = caller_env()) { + abort( + sprintf("Unexpected type <%s>.", typeof(x)), + call = call + ) +} + +#' Return OO type +#' @param x Any R object. +#' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, +#' `"R6"`, or `"S7"`. +#' @noRd +obj_type_oo <- function(x) { + if (!is.object(x)) { + return("bare") + } + + class <- inherits(x, c("R6", "S7_object"), which = TRUE) + + if (class[[1]]) { + "R6" + } else if (class[[2]]) { + "S7" + } else if (isS4(x)) { + "S4" + } else { + "S3" + } +} + +#' @param x The object type which does not conform to `what`. Its +#' `obj_type_friendly()` is taken and mentioned in the error message. +#' @param what The friendly expected type as a string. Can be a +#' character vector of expected types, in which case the error +#' message mentions all of them in an "or" enumeration. +#' @param show_value Passed to `value` argument of `obj_type_friendly()`. +#' @param ... Arguments passed to [abort()]. +#' @inheritParams args_error_context +#' @noRd +stop_input_type <- function(x, + what, + ..., + allow_na = FALSE, + allow_null = FALSE, + show_value = TRUE, + arg = caller_arg(x), + call = caller_env()) { + # From standalone-cli.R + cli <- env_get_list( + nms = c("format_arg", "format_code"), + last = topenv(), + default = function(x) sprintf("`%s`", x), + inherit = TRUE + ) + + if (allow_na) { + what <- c(what, cli$format_code("NA")) + } + if (allow_null) { + what <- c(what, cli$format_code("NULL")) + } + if (length(what)) { + what <- oxford_comma(what) + } + if (inherits(arg, "AsIs")) { + format_arg <- identity + } else { + format_arg <- cli$format_arg + } + + message <- sprintf( + "%s must be %s, not %s.", + format_arg(arg), + what, + obj_type_friendly(x, value = show_value) + ) + + abort(message, ..., call = call, arg = arg) +} + +oxford_comma <- function(chr, sep = ", ", final = "or") { + n <- length(chr) + + if (n < 2) { + return(chr) + } + + head <- chr[seq_len(n - 1)] + last <- chr[n] + + head <- paste(head, collapse = sep) + + # Write a or b. But a, b, or c. + if (n > 2) { + paste0(head, sep, final, " ", last) + } else { + paste0(head, " ", final, " ", last) + } +} + +# nocov end diff --git a/R/import-standalone-types-check.R b/R/import-standalone-types-check.R new file mode 100644 index 0000000..ef8c5a1 --- /dev/null +++ b/R/import-standalone-types-check.R @@ -0,0 +1,554 @@ +# Standalone file: do not edit by hand +# Source: https://github.com/r-lib/rlang/blob/HEAD/R/standalone-types-check.R +# Generated by: usethis::use_standalone("r-lib/rlang", "types-check") +# ---------------------------------------------------------------------- +# +# --- +# repo: r-lib/rlang +# file: standalone-types-check.R +# last-updated: 2023-03-13 +# license: https://unlicense.org +# dependencies: standalone-obj-type.R +# imports: rlang (>= 1.1.0) +# --- +# +# ## Changelog +# +# 2024-08-15: +# - `check_character()` gains an `allow_na` argument (@martaalcalde, #1724) +# +# 2023-03-13: +# - Improved error messages of number checkers (@teunbrand) +# - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich). +# - Added `check_data_frame()` (@mgirlich). +# +# 2023-03-07: +# - Added dependency on rlang (>= 1.1.0). +# +# 2023-02-15: +# - Added `check_logical()`. +# +# - `check_bool()`, `check_number_whole()`, and +# `check_number_decimal()` are now implemented in C. +# +# - For efficiency, `check_number_whole()` and +# `check_number_decimal()` now take a `NULL` default for `min` and +# `max`. This makes it possible to bypass unnecessary type-checking +# and comparisons in the default case of no bounds checks. +# +# 2022-10-07: +# - `check_number_whole()` and `_decimal()` no longer treat +# non-numeric types such as factors or dates as numbers. Numeric +# types are detected with `is.numeric()`. +# +# 2022-10-04: +# - Added `check_name()` that forbids the empty string. +# `check_string()` allows the empty string by default. +# +# 2022-09-28: +# - Removed `what` arguments. +# - Added `allow_na` and `allow_null` arguments. +# - Added `allow_decimal` and `allow_infinite` arguments. +# - Improved errors with absent arguments. +# +# +# 2022-09-16: +# - Unprefixed usage of rlang functions with `rlang::` to +# avoid onLoad issues when called from rlang (#1482). +# +# 2022-08-11: +# - Added changelog. +# +# nocov start + +# Scalars ----------------------------------------------------------------- + +.standalone_types_check_dot_call <- .Call + +check_bool <- function(x, + ..., + allow_na = FALSE, + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x) && .standalone_types_check_dot_call(ffi_standalone_is_bool_1.0.7, x, allow_na, allow_null)) { + return(invisible(NULL)) + } + + stop_input_type( + x, + c("`TRUE`", "`FALSE`"), + ..., + allow_na = allow_na, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_string <- function(x, + ..., + allow_empty = TRUE, + allow_na = FALSE, + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + is_string <- .rlang_check_is_string( + x, + allow_empty = allow_empty, + allow_na = allow_na, + allow_null = allow_null + ) + if (is_string) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a single string", + ..., + allow_na = allow_na, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +.rlang_check_is_string <- function(x, + allow_empty, + allow_na, + allow_null) { + if (is_string(x)) { + if (allow_empty || !is_string(x, "")) { + return(TRUE) + } + } + + if (allow_null && is_null(x)) { + return(TRUE) + } + + if (allow_na && (identical(x, NA) || identical(x, na_chr))) { + return(TRUE) + } + + FALSE +} + +check_name <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + is_string <- .rlang_check_is_string( + x, + allow_empty = FALSE, + allow_na = FALSE, + allow_null = allow_null + ) + if (is_string) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a valid name", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +IS_NUMBER_true <- 0 +IS_NUMBER_false <- 1 +IS_NUMBER_oob <- 2 + +check_number_decimal <- function(x, + ..., + min = NULL, + max = NULL, + allow_infinite = TRUE, + allow_na = FALSE, + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (missing(x)) { + exit_code <- IS_NUMBER_false + } else if (0 == (exit_code <- .standalone_types_check_dot_call( + ffi_standalone_check_number_1.0.7, + x, + allow_decimal = TRUE, + min, + max, + allow_infinite, + allow_na, + allow_null + ))) { + return(invisible(NULL)) + } + + .stop_not_number( + x, + ..., + exit_code = exit_code, + allow_decimal = TRUE, + min = min, + max = max, + allow_na = allow_na, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_number_whole <- function(x, + ..., + min = NULL, + max = NULL, + allow_infinite = FALSE, + allow_na = FALSE, + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (missing(x)) { + exit_code <- IS_NUMBER_false + } else if (0 == (exit_code <- .standalone_types_check_dot_call( + ffi_standalone_check_number_1.0.7, + x, + allow_decimal = FALSE, + min, + max, + allow_infinite, + allow_na, + allow_null + ))) { + return(invisible(NULL)) + } + + .stop_not_number( + x, + ..., + exit_code = exit_code, + allow_decimal = FALSE, + min = min, + max = max, + allow_na = allow_na, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +.stop_not_number <- function(x, + ..., + exit_code, + allow_decimal, + min, + max, + allow_na, + allow_null, + arg, + call) { + if (allow_decimal) { + what <- "a number" + } else { + what <- "a whole number" + } + + if (exit_code == IS_NUMBER_oob) { + min <- min %||% -Inf + max <- max %||% Inf + + if (min > -Inf && max < Inf) { + what <- sprintf("%s between %s and %s", what, min, max) + } else if (x < min) { + what <- sprintf("%s larger than or equal to %s", what, min) + } else if (x > max) { + what <- sprintf("%s smaller than or equal to %s", what, max) + } else { + abort("Unexpected state in OOB check", .internal = TRUE) + } + } + + stop_input_type( + x, + what, + ..., + allow_na = allow_na, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_symbol <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_symbol(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a symbol", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_arg <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_symbol(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "an argument name", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_call <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_call(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a defused call", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_environment <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_environment(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "an environment", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_function <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_function(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a function", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_closure <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_closure(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "an R function", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_formula <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_formula(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a formula", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + + +# Vectors ----------------------------------------------------------------- + +# TODO: Figure out what to do with logical `NA` and `allow_na = TRUE` + +check_character <- function(x, + ..., + allow_na = TRUE, + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + + if (!missing(x)) { + if (is_character(x)) { + if (!allow_na && any(is.na(x))) { + abort( + sprintf("`%s` can't contain NA values.", arg), + arg = arg, + call = call + ) + } + + return(invisible(NULL)) + } + + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a character vector", + ..., + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_logical <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is_logical(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a logical vector", + ..., + allow_na = FALSE, + allow_null = allow_null, + arg = arg, + call = call + ) +} + +check_data_frame <- function(x, + ..., + allow_null = FALSE, + arg = caller_arg(x), + call = caller_env()) { + if (!missing(x)) { + if (is.data.frame(x)) { + return(invisible(NULL)) + } + if (allow_null && is_null(x)) { + return(invisible(NULL)) + } + } + + stop_input_type( + x, + "a data frame", + ..., + allow_null = allow_null, + arg = arg, + call = call + ) +} + +# nocov end diff --git a/R/pca_sparse.R b/R/pca_sparse.R index 71db0ce..b9f212e 100644 --- a/R/pca_sparse.R +++ b/R/pca_sparse.R @@ -211,7 +211,7 @@ bake.step_pca_sparse <- function(object, new_data, ...) { x <- as.matrix(new_data[, pca_vars]) comps <- x %*% object$res comps <- as_tibble(comps) - comps <- check_name(comps, new_data, object) + comps <- recipes::check_name(comps, new_data, object) new_data <- vec_cbind(new_data, comps) new_data <- remove_original_cols(new_data, object, pca_vars) diff --git a/R/pca_sparse_bayes.R b/R/pca_sparse_bayes.R index 45b00a7..376b70f 100644 --- a/R/pca_sparse_bayes.R +++ b/R/pca_sparse_bayes.R @@ -234,7 +234,7 @@ bake.step_pca_sparse_bayes <- function(object, new_data, ...) { x <- as.matrix(new_data[, pca_vars]) comps <- x %*% object$res comps <- as_tibble(comps) - comps <- check_name(comps, new_data, object) + comps <- recipes::check_name(comps, new_data, object) new_data <- vec_cbind(new_data, comps) new_data <- remove_original_cols(new_data, object, pca_vars) diff --git a/R/pca_truncated.R b/R/pca_truncated.R index 626d2cd..54aa829 100644 --- a/R/pca_truncated.R +++ b/R/pca_truncated.R @@ -208,7 +208,7 @@ bake.step_pca_truncated <- function(object, new_data, ...) { object$res$rotation comps <- comps[, 1:object$num_comp, drop = FALSE] comps <- as_tibble(comps) - comps <- check_name(comps, new_data, object) + comps <- recipes::check_name(comps, new_data, object) new_data <- vec_cbind(new_data, comps) new_data <- remove_original_cols(new_data, object, pca_vars) diff --git a/R/umap.R b/R/umap.R index 7986176..316fb4a 100644 --- a/R/umap.R +++ b/R/umap.R @@ -304,7 +304,7 @@ bake.step_umap <- function(object, new_data, ...) { colnames(res) <- names0(object$num_comp, prefix = object$prefix) res <- as_tibble(res) - res <- check_name(res, new_data, object, names(res)) + res <- recipes::check_name(res, new_data, object, names(res)) new_data <- vec_cbind(new_data, res) new_data <- remove_original_cols(new_data, object, col_names) From b05d073cd18d01e01aef36f4b9c1091935f11d64 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Thu, 7 Nov 2024 13:43:37 -0800 Subject: [PATCH 2/3] add input type checkers to all steps --- R/collapse_cart.R | 3 + R/collapse_stringdist.R | 5 +- R/discretize_cart.R | 4 + R/discretize_xgb.R | 6 + R/embed.R | 3 + R/feature_hash.R | 2 + R/lencode_bayes.R | 3 + R/pca_sparse.R | 7 ++ R/pca_sparse_bayes.R | 11 +- R/pca_truncated.R | 4 + R/umap.R | 23 +++- R/woe.R | 4 + tests/testthat/_snaps/collapse_cart.md | 16 +++ tests/testthat/_snaps/collapse_stringdist.md | 16 +++ tests/testthat/_snaps/discretize_cart.md | 30 +++++ tests/testthat/_snaps/discretize_xgb.md | 50 +++++++++ tests/testthat/_snaps/embed.md | 20 ++++ tests/testthat/_snaps/feature_hash.md | 12 ++ tests/testthat/_snaps/lencode_bayes.md | 8 ++ tests/testthat/_snaps/pca_sparse.md | 26 +++++ tests/testthat/_snaps/pca_sparse_bayes.md | 37 +++++++ tests/testthat/_snaps/pca_truncated.md | 17 +++ tests/testthat/_snaps/umap.md | 111 ++++++++++++------- tests/testthat/_snaps/woe.md | 18 +++ tests/testthat/test-collapse_cart.R | 13 +++ tests/testthat/test-collapse_stringdist.R | 15 +++ tests/testthat/test-discretize_cart.R | 22 ++++ tests/testthat/test-discretize_xgb.R | 35 ++++++ tests/testthat/test-embed.R | 15 +++ tests/testthat/test-feature_hash.R | 9 ++ tests/testthat/test-lencode_bayes.R | 8 ++ tests/testthat/test-pca_sparse.R | 20 ++++ tests/testthat/test-pca_sparse_bayes.R | 26 +++++ tests/testthat/test-pca_truncated.R | 14 +++ tests/testthat/test-umap.R | 49 ++++++++ tests/testthat/test-woe.R | 14 +++ 36 files changed, 630 insertions(+), 46 deletions(-) diff --git a/R/collapse_cart.R b/R/collapse_cart.R index 1815349..10201ad 100644 --- a/R/collapse_cart.R +++ b/R/collapse_cart.R @@ -80,6 +80,9 @@ step_collapse_cart <- id = rand_id("step_collapse_cart")) { recipes_pkg_check(required_pkgs.step_discretize_cart()) + check_number_decimal(cost_complexity, min = 0) + check_number_whole(min_n, min = 1) + add_step( recipe, step_collapse_cart_new( diff --git a/R/collapse_stringdist.R b/R/collapse_stringdist.R index eb58b57..58cd0e7 100644 --- a/R/collapse_stringdist.R +++ b/R/collapse_stringdist.R @@ -78,9 +78,8 @@ step_collapse_stringdist <- columns = NULL, skip = FALSE, id = rand_id("collapse_stringdist")) { - if (is.null(distance)) { - cli::cli_abort("The {.arg distance} argument must be set.") - } + check_number_decimal(distance, min = 0) + check_string(method) add_step( recipe, diff --git a/R/discretize_cart.R b/R/discretize_cart.R index e3c5df1..6e636f4 100644 --- a/R/discretize_cart.R +++ b/R/discretize_cart.R @@ -192,6 +192,10 @@ cart_binning <- function(predictor, term, outcome, cost_complexity, tree_depth, prep.step_discretize_cart <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_decimal(x$cost_complexity, min = 0, arg = "cost_complexity") + check_number_decimal(x$tree_depth, min = 0, arg = "tree_depth") + check_number_decimal(x$min_n, min = 0, arg = "min_n") + wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts) if (isFALSE(were_weights_used)) { diff --git a/R/discretize_xgb.R b/R/discretize_xgb.R index e98aa95..87daae0 100644 --- a/R/discretize_xgb.R +++ b/R/discretize_xgb.R @@ -392,6 +392,12 @@ xgb_binning <- function(df, outcome, predictor, sample_val, learn_rate, prep.step_discretize_xgb <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_decimal(x$sample_val, min = 0, max = 1, arg = "sample_val") + check_number_decimal(x$learn_rate, min = 0, arg = "learn_rate") + check_number_whole(x$num_breaks, min = 0, arg = "num_breaks") + check_number_whole(x$tree_depth, min = 0, arg = "tree_depth") + check_number_whole(x$min_n, min = 0, arg = "min_n") + wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts) if (isFALSE(were_weights_used) || is.null(wts)) { diff --git a/R/embed.R b/R/embed.R index ca69039..58573c5 100644 --- a/R/embed.R +++ b/R/embed.R @@ -206,6 +206,9 @@ step_embed_new <- prep.step_embed <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_whole(x$num_terms, min = 0, arg = "num_terms") + check_number_whole(x$hidden_units, min = 0, arg = "hidden_units") + if (length(col_names) > 0) { check_type(training[, col_names], types = c("string", "factor", "ordered")) y_name <- recipes_eval_select(x$outcome, training, info) diff --git a/R/feature_hash.R b/R/feature_hash.R index 14d62c2..e5bb21c 100644 --- a/R/feature_hash.R +++ b/R/feature_hash.R @@ -138,6 +138,8 @@ step_feature_hash_new <- prep.step_feature_hash <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_whole(x$num_hash, min = 0, arg = "num_hash") + if (length(col_names) > 0) { check_type(training[, col_names], types = c("string", "factor", "ordered")) } diff --git a/R/lencode_bayes.R b/R/lencode_bayes.R index 7aad3e9..aaa3658 100644 --- a/R/lencode_bayes.R +++ b/R/lencode_bayes.R @@ -119,6 +119,9 @@ step_lencode_bayes <- if (is.null(outcome)) { cli::cli_abort("Please list a variable in {.code outcome}.") } + + check_bool(verbose) + add_step( recipe, step_lencode_bayes_new( diff --git a/R/pca_sparse.R b/R/pca_sparse.R index b9f212e..c8b813d 100644 --- a/R/pca_sparse.R +++ b/R/pca_sparse.R @@ -103,6 +103,8 @@ step_pca_sparse <- function(recipe, keep_original_cols = FALSE, skip = FALSE, id = rand_id("pca_sparse")) { + check_string(prefix) + add_step( recipe, step_pca_sparse_new( @@ -144,6 +146,11 @@ step_pca_sparse_new <- prep.step_pca_sparse <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_whole(x$num_comp, min = 0, arg = "num_comp") + check_number_decimal( + x$predictor_prop, min = 0, max = 1, arg = "predictor_prop" + ) + if (length(col_names) > 0 && x$num_comp > 0) { check_type(training[, col_names], types = c("double", "integer")) diff --git a/R/pca_sparse_bayes.R b/R/pca_sparse_bayes.R index 376b70f..76d6a55 100644 --- a/R/pca_sparse_bayes.R +++ b/R/pca_sparse_bayes.R @@ -124,7 +124,8 @@ step_pca_sparse_bayes <- function(recipe, keep_original_cols = FALSE, skip = FALSE, id = rand_id("pca_sparse_bayes")) { - + check_string(prefix) + add_step( recipe, step_pca_sparse_bayes_new( @@ -169,6 +170,14 @@ step_pca_sparse_bayes_new <- prep.step_pca_sparse_bayes <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_whole(x$num_comp, min = 0, arg = "num_comp") + check_number_decimal( + x$prior_slab_dispersion, min = 0, arg = "prior_slab_dispersion" + ) + check_number_decimal( + x$prior_mixture_threshold, min = 0, max = 1, arg = "prior_mixture_threshold" + ) + if (length(col_names) > 0) { check_type(training[, col_names], types = c("double", "integer")) diff --git a/R/pca_truncated.R b/R/pca_truncated.R index 54aa829..a1dc7bc 100644 --- a/R/pca_truncated.R +++ b/R/pca_truncated.R @@ -96,6 +96,8 @@ step_pca_truncated <- function(recipe, keep_original_cols = FALSE, skip = FALSE, id = rand_id("pca_truncated")) { + check_string(prefix) + add_step( recipe, step_pca_truncated_new( @@ -140,6 +142,8 @@ prep.step_pca_truncated <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) check_type(training[, col_names], types = c("double", "integer")) + check_number_whole(x$num_comp, min = 0, arg = "num_comp") + wts <- get_case_weights(info, training) were_weights_used <- are_weights_used(wts, unsupervised = TRUE) if (isFALSE(were_weights_used)) { diff --git a/R/umap.R b/R/umap.R index 316fb4a..6e978c8 100644 --- a/R/umap.R +++ b/R/umap.R @@ -137,6 +137,8 @@ step_umap <- keep_original_cols <- retain } + check_string(prefix) + recipes_pkg_check(required_pkgs.step_umap()) if (is.numeric(seed) && !is.integer(seed)) { seed <- as.integer(seed) @@ -229,6 +231,14 @@ umap_fit_call <- function(obj, y = NULL) { prep.step_umap <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_whole(x$num_comp, min = 0, arg = "num_comp") + check_number_whole(x$neighbors, min = 0, arg = "neighbors") + check_number_decimal(x$min_dist, arg = "min_dist") + check_number_decimal(x$learn_rate, min = 0, arg = "learn_rate") + check_number_whole(x$epochs, min = 0, allow_null = TRUE, arg = "epochs") + rlang::arg_match0(x$initial, initial_umap_values, arg_nm = "initial") + check_number_decimal(x$target_weight, min = 0, max = 1, arg = "target_weight") + if (length(col_names) > 0) { if (length(x$outcome) > 0) { y_name <- recipes_eval_select(x$outcome, training, info) @@ -355,7 +365,7 @@ tunable.step_umap <- function(x, ...) { list(pkg = "dials", fun = "min_dist", range = c(-4, -0.69897)), list(pkg = "dials", fun = "learn_rate"), list(pkg = "dials", fun = "epochs", range = c(100, 700)), - list(pkg = "dials", fun = "initial_umap", values = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral")), + list(pkg = "dials", fun = "initial_umap", values = initial_umap_values), list(pkg = "dials", fun = "target_weight", range = c(0, 1)) ), source = "recipe", @@ -363,3 +373,14 @@ tunable.step_umap <- function(x, ...) { component_id = x$id ) } + +initial_umap_values <- c( + "spectral", + "normlaplacian", + "random", + "lvrandom", + "laplacian", + "pca", + "spca", + "agspectral" +) diff --git a/R/woe.R b/R/woe.R index d2a4e7e..83a3bdc 100644 --- a/R/woe.R +++ b/R/woe.R @@ -159,6 +159,8 @@ step_woe <- function(recipe, cli::cli_abort("The {.arg outcome} argument is missing, with no default.") } + check_string(prefix) + add_step( recipe, step_woe_new( @@ -423,6 +425,8 @@ add_woe <- function(.data, outcome, ..., dictionary = NULL, prefix = "woe") { prep.step_woe <- function(x, training, info = NULL, ...) { col_names <- recipes_eval_select(x$terms, training, info) + check_number_decimal(x$Laplace, arg = "Laplace") + if (length(col_names) > 0) { outcome_name <- recipes_eval_select(x$outcome, training, info) diff --git a/tests/testthat/_snaps/collapse_cart.md b/tests/testthat/_snaps/collapse_cart.md index 81f830e..bcba602 100644 --- a/tests/testthat/_snaps/collapse_cart.md +++ b/tests/testthat/_snaps/collapse_cart.md @@ -1,3 +1,19 @@ +# bad args + + Code + recipe(~., data = mtcars) %>% step_collapse_cart(cost_complexity = -4) + Condition + Error in `step_collapse_cart()`: + ! `cost_complexity` must be a number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_collapse_cart(min_n = -4) + Condition + Error in `step_collapse_cart()`: + ! `min_n` must be a whole number larger than or equal to 1, not the number -4. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/collapse_stringdist.md b/tests/testthat/_snaps/collapse_stringdist.md index fbddb1f..b3bb9ae 100644 --- a/tests/testthat/_snaps/collapse_stringdist.md +++ b/tests/testthat/_snaps/collapse_stringdist.md @@ -1,3 +1,19 @@ +# bad args + + Code + recipe(~., data = mtcars) %>% step_collapse_stringdist(cost_complexity = -4) + Condition + Error in `step_collapse_stringdist()`: + ! `distance` must be a number, not `NULL`. + +--- + + Code + recipe(~., data = mtcars) %>% step_collapse_stringdist(min_n = -4) + Condition + Error in `step_collapse_stringdist()`: + ! `distance` must be a number, not `NULL`. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/discretize_cart.md b/tests/testthat/_snaps/discretize_cart.md index 8125247..721b8d1 100644 --- a/tests/testthat/_snaps/discretize_cart.md +++ b/tests/testthat/_snaps/discretize_cart.md @@ -45,6 +45,36 @@ x All columns selected for the step should be double or integer. * 1 factor variable found: `w` +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_cart(outcome = vars("mpg"), + cost_complexity = -4) %>% prep() + Condition + Error in `step_discretize_cart()`: + Caused by error in `prep()`: + ! `cost_complexity` must be a number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_cart(outcome = vars("mpg"), + min_n = -4) %>% prep() + Condition + Error in `step_discretize_cart()`: + Caused by error in `prep()`: + ! `min_n` must be a number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_cart(outcome = vars("mpg"), + tree_depth = -4) %>% prep() + Condition + Error in `step_discretize_cart()`: + Caused by error in `prep()`: + ! `tree_depth` must be a number larger than or equal to 0, not the number -4. + # tidy method Code diff --git a/tests/testthat/_snaps/discretize_xgb.md b/tests/testthat/_snaps/discretize_xgb.md index fc3bd5c..5636842 100644 --- a/tests/testthat/_snaps/discretize_xgb.md +++ b/tests/testthat/_snaps/discretize_xgb.md @@ -296,6 +296,56 @@ -- Operations * Discretizing variables using xgboost: x and z | Trained, weighted +# bad args + + Code + recipe(~., data = mtcars) %>% step_discretize_xgb(outcome = "class", + sample_val = -4) %>% prep() + Condition + Error in `step_discretize_xgb()`: + Caused by error in `prep()`: + ! `sample_val` must be a number between 0 and 1, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_xgb(outcome = "class", + learn_rate = -4) %>% prep() + Condition + Error in `step_discretize_xgb()`: + Caused by error in `prep()`: + ! `learn_rate` must be a number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_xgb(outcome = "class", + num_breaks = -4) %>% prep() + Condition + Error in `step_discretize_xgb()`: + Caused by error in `prep()`: + ! `num_breaks` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_xgb(outcome = "class", + tree_depth = -4) %>% prep() + Condition + Error in `step_discretize_xgb()`: + Caused by error in `prep()`: + ! `tree_depth` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_discretize_xgb(outcome = "class", min_n = -4) %>% + prep() + Condition + Error in `step_discretize_xgb()`: + Caused by error in `prep()`: + ! `min_n` must be a whole number larger than or equal to 0, not the number -4. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/embed.md b/tests/testthat/_snaps/embed.md index 46778ab..1d78694 100644 --- a/tests/testthat/_snaps/embed.md +++ b/tests/testthat/_snaps/embed.md @@ -29,6 +29,26 @@ x All columns selected for the step should be string, factor, or ordered. * 1 double variable found: `Sepal.Length` +--- + + Code + recipe(~., data = mtcars) %>% step_embed(outcome = vars(mpg), num_terms = -4) %>% + prep() + Condition + Error in `step_embed()`: + Caused by error in `prep()`: + ! `num_terms` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_embed(outcome = vars(mpg), hidden_units = -4) %>% + prep() + Condition + Error in `step_embed()`: + Caused by error in `prep()`: + ! `hidden_units` must be a whole number larger than or equal to 0, not the number -4. + # check_name() is used Code diff --git a/tests/testthat/_snaps/feature_hash.md b/tests/testthat/_snaps/feature_hash.md index 2862e6f..d8f913d 100644 --- a/tests/testthat/_snaps/feature_hash.md +++ b/tests/testthat/_snaps/feature_hash.md @@ -18,6 +18,18 @@ ! Name collision occurred. The following variable names already exist: * `x3_hash_01` +# bad args + + Code + recipe(~., data = mtcars) %>% step_feature_hash(num_hash = -4) %>% prep() + Condition + Warning: + `step_feature_hash()` was deprecated in embed 0.2.0. + i Please use `textrecipes::step_dummy_hash()` instead. + Error in `step_feature_hash()`: + Caused by error in `prep()`: + ! `num_hash` must be a whole number larger than or equal to 0, not the number -4. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/lencode_bayes.md b/tests/testthat/_snaps/lencode_bayes.md index e909b32..766dd07 100644 --- a/tests/testthat/_snaps/lencode_bayes.md +++ b/tests/testthat/_snaps/lencode_bayes.md @@ -101,6 +101,14 @@ -- Operations * Linear embedding for factors via Bayesian GLM for: x3 | Trained, weighted +# bad args + + Code + recipe(~., data = mtcars) %>% step_lencode_bayes(outcome = vars(mpg), verbose = "yes") + Condition + Error in `step_lencode_bayes()`: + ! `verbose` must be `TRUE` or `FALSE`, not the string "yes". + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/pca_sparse.md b/tests/testthat/_snaps/pca_sparse.md index 360049e..2f4c250 100644 --- a/tests/testthat/_snaps/pca_sparse.md +++ b/tests/testthat/_snaps/pca_sparse.md @@ -26,6 +26,32 @@ ! Name collision occurred. The following variable names already exist: * `PC1` +# bad args + + Code + recipe(~., data = mtcars) %>% step_pca_sparse(num_comp = -4) %>% prep() + Condition + Error in `step_pca_sparse()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_pca_sparse(predictor_prop = -4) %>% prep() + Condition + Error in `step_pca_sparse()`: + Caused by error in `prep()`: + ! `predictor_prop` must be a number between 0 and 1, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_pca_sparse(prefix = NULL) + Condition + Error in `step_pca_sparse()`: + ! `prefix` must be a single string, not `NULL`. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/pca_sparse_bayes.md b/tests/testthat/_snaps/pca_sparse_bayes.md index 26c878b..da9ec14 100644 --- a/tests/testthat/_snaps/pca_sparse_bayes.md +++ b/tests/testthat/_snaps/pca_sparse_bayes.md @@ -26,6 +26,43 @@ ! Name collision occurred. The following variable names already exist: * `PC1` +# bad args + + Code + recipe(~., data = mtcars) %>% step_pca_sparse_bayes(num_comp = -4) %>% prep() + Condition + Error in `step_pca_sparse_bayes()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_pca_sparse_bayes(prior_slab_dispersion = -4) %>% + prep() + Condition + Error in `step_pca_sparse_bayes()`: + Caused by error in `prep()`: + ! `prior_slab_dispersion` must be a number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_pca_sparse_bayes(prior_mixture_threshold = - + 4) %>% prep() + Condition + Error in `step_pca_sparse_bayes()`: + Caused by error in `prep()`: + ! `prior_mixture_threshold` must be a number between 0 and 1, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_pca_sparse_bayes(prefix = NULL) + Condition + Error in `step_pca_sparse_bayes()`: + ! `prefix` must be a single string, not `NULL`. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/pca_truncated.md b/tests/testthat/_snaps/pca_truncated.md index 1168e1b..3939e60 100644 --- a/tests/testthat/_snaps/pca_truncated.md +++ b/tests/testthat/_snaps/pca_truncated.md @@ -8,6 +8,23 @@ ! Name collision occurred. The following variable names already exist: * `PC1` +# bad args + + Code + recipe(~., data = mtcars) %>% step_pca_truncated(num_comp = -4) %>% prep() + Condition + Error in `step_pca_truncated()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_pca_truncated(prefix = NULL) + Condition + Error in `step_pca_truncated()`: + ! `prefix` must be a single string, not `NULL`. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/_snaps/umap.md b/tests/testthat/_snaps/umap.md index 6a325f7..60ced23 100644 --- a/tests/testthat/_snaps/umap.md +++ b/tests/testthat/_snaps/umap.md @@ -8,80 +8,109 @@ ! Name collision occurred. The following variable names already exist: * `UMAP1` -# empty printing +# bad args Code - rec - Message - - -- Recipe ---------------------------------------------------------------------- - - -- Inputs - Number of variables by role - outcome: 1 - predictor: 10 - - -- Operations - * UMAP embedding for: + recipe(~., data = mtcars) %>% step_umap(num_comp = -4) %>% prep() + Condition + Error in `step_umap()`: + Caused by error in `prep()`: + ! `num_comp` must be a whole number larger than or equal to 0, not the number -4. --- Code - rec - Message - - -- Recipe ---------------------------------------------------------------------- - - -- Inputs - Number of variables by role - outcome: 1 - predictor: 10 - - -- Training information - Training data contained 32 data points and no incomplete rows. - - -- Operations - * UMAP embedding for: | Trained + recipe(~., data = mtcars) %>% step_umap(neighbors = -4) %>% prep() + Condition + Error in `step_umap()`: + Caused by error in `prep()`: + ! `neighbors` must be a whole number larger than or equal to 0, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_umap(min_dist = TRUE) %>% prep() + Condition + Error in `step_umap()`: + Caused by error in `prep()`: + ! `min_dist` must be a number, not `TRUE`. + +--- + + Code + recipe(~., data = mtcars) %>% step_umap(learn_rate = -4) %>% prep() + Condition + Error in `step_umap()`: + Caused by error in `prep()`: + ! `learn_rate` must be a number larger than or equal to 0, not the number -4. -# keep_original_cols - can prep recipes with it missing +--- Code - rec <- prep(rec) + recipe(~., data = mtcars) %>% step_umap(epochs = -4) %>% prep() Condition - Warning: - `keep_original_cols` was added to `step_pca()` after this recipe was created. - i Regenerate your recipe to avoid this warning. + Error in `step_umap()`: + Caused by error in `prep()`: + ! `epochs` must be a whole number larger than or equal to 0 or `NULL`, not the number -4. -# printing +--- Code - print(rec) + recipe(~., data = mtcars) %>% step_umap(initial = "wrong") %>% prep() + Condition + Error in `step_umap()`: + Caused by error in `prep()`: + ! `initial` must be one of "spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", or "agspectral", not "wrong". + +--- + + Code + recipe(~., data = mtcars) %>% step_umap(target_weight = -4) %>% prep() + Condition + Error in `step_umap()`: + Caused by error in `prep()`: + ! `target_weight` must be a number between 0 and 1, not the number -4. + +--- + + Code + recipe(~., data = mtcars) %>% step_umap(prefix = NULL) + Condition + Error in `step_umap()`: + ! `prefix` must be a single string, not `NULL`. + +# empty printing + + Code + rec Message -- Recipe ---------------------------------------------------------------------- -- Inputs Number of variables by role - predictor: 4 + outcome: 1 + predictor: 10 -- Operations - * UMAP embedding for: all_predictors() + * UMAP embedding for: --- Code - prep(rec) + rec Message -- Recipe ---------------------------------------------------------------------- -- Inputs Number of variables by role - predictor: 4 + outcome: 1 + predictor: 10 -- Training information - Training data contained 133 data points and no incomplete rows. + Training data contained 32 data points and no incomplete rows. -- Operations - * UMAP embedding for: Sepal.Length, Sepal.Width, Petal.Length, ... | Trained + * UMAP embedding for: | Trained diff --git a/tests/testthat/_snaps/woe.md b/tests/testthat/_snaps/woe.md index c607a81..bbefe50 100644 --- a/tests/testthat/_snaps/woe.md +++ b/tests/testthat/_snaps/woe.md @@ -109,6 +109,24 @@ Caused by error in `dictionary()`: ! `outcome` must have exactly 2 categories (has 3). +# bad args + + Code + recipe(~., data = mtcars) %>% step_woe(outcome = vars(mpg), Laplace = NULL) %>% + prep() + Condition + Error in `step_woe()`: + Caused by error in `prep()`: + ! `Laplace` must be a number, not `NULL`. + +--- + + Code + recipe(~., data = mtcars) %>% step_woe(outcome = vars(mpg), prefix = NULL) + Condition + Error in `step_woe()`: + ! `prefix` must be a single string, not `NULL`. + # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/test-collapse_cart.R b/tests/testthat/test-collapse_cart.R index f187b72..ad494c0 100644 --- a/tests/testthat/test-collapse_cart.R +++ b/tests/testthat/test-collapse_cart.R @@ -101,6 +101,19 @@ test_that("failed collapsing", { expect_true(length(rec_5$steps[[1]]$results) == 0) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_collapse_cart(cost_complexity = -4) + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_collapse_cart(min_n = -4) + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-collapse_stringdist.R b/tests/testthat/test-collapse_stringdist.R index 1648d6c..7f484dd 100644 --- a/tests/testthat/test-collapse_stringdist.R +++ b/tests/testthat/test-collapse_stringdist.R @@ -193,6 +193,21 @@ test_that("failed collapsing", { ) }) +test_that("bad args", { + skip_if_not_installed("stringdist") + + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_collapse_stringdist(cost_complexity = -4) + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_collapse_stringdist(min_n = -4) + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R index d4ef367..794960a 100644 --- a/tests/testthat/test-discretize_cart.R +++ b/tests/testthat/test-discretize_cart.R @@ -206,6 +206,28 @@ test_that("tunable", { ) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_cart(outcome = vars("mpg"), cost_complexity = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_cart(outcome = vars("mpg"), min_n = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_cart(outcome = vars("mpg"), tree_depth = -4) %>% + prep() + ) +}) + + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-discretize_xgb.R b/tests/testthat/test-discretize_xgb.R index ee94576..1dc3686 100644 --- a/tests/testthat/test-discretize_xgb.R +++ b/tests/testthat/test-discretize_xgb.R @@ -629,6 +629,41 @@ test_that("tunable", { ) }) + +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_xgb(outcome = "class", sample_val = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_xgb(outcome = "class", learn_rate = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_xgb(outcome = "class", num_breaks = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_xgb(outcome = "class", tree_depth = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_discretize_xgb(outcome = "class", min_n = -4) %>% + prep() + ) + +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R index b6a27d9..243b482 100644 --- a/tests/testthat/test-embed.R +++ b/tests/testthat/test-embed.R @@ -332,6 +332,21 @@ test_that("tunable", { ) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_embed(outcome = vars(mpg), num_terms = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_embed(outcome = vars(mpg), hidden_units = -4) %>% + prep() + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-feature_hash.R b/tests/testthat/test-feature_hash.R index d95eafc..d99840d 100644 --- a/tests/testthat/test-feature_hash.R +++ b/tests/testthat/test-feature_hash.R @@ -123,6 +123,15 @@ test_that("check_name() is used", { ) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_feature_hash(num_hash = -4) %>% + prep() + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-lencode_bayes.R b/tests/testthat/test-lencode_bayes.R index 5e7c581..b06452b 100644 --- a/tests/testthat/test-lencode_bayes.R +++ b/tests/testthat/test-lencode_bayes.R @@ -412,6 +412,14 @@ test_that("case weights", { expect_snapshot(class_test) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_lencode_bayes(outcome = vars(mpg), verbose = "yes") + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-pca_sparse.R b/tests/testthat/test-pca_sparse.R index 775424d..1c10600 100644 --- a/tests/testthat/test-pca_sparse.R +++ b/tests/testthat/test-pca_sparse.R @@ -106,6 +106,26 @@ test_that("Do nothing for num_comps = 0 and keep_original_cols = FALSE", { expect_identical(res, tibble::as_tibble(mtcars)) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse(num_comp = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse(predictor_prop = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse(prefix = NULL) + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-pca_sparse_bayes.R b/tests/testthat/test-pca_sparse_bayes.R index 842961a..f17a778 100644 --- a/tests/testthat/test-pca_sparse_bayes.R +++ b/tests/testthat/test-pca_sparse_bayes.R @@ -115,6 +115,32 @@ test_that("Do nothing for num_comps = 0 and keep_original_cols = FALSE", { expect_identical(res, tibble::as_tibble(mtcars)) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse_bayes(num_comp = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse_bayes(prior_slab_dispersion = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse_bayes(prior_mixture_threshold = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_sparse_bayes(prefix = NULL) + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-pca_truncated.R b/tests/testthat/test-pca_truncated.R index c47de41..224f7b9 100644 --- a/tests/testthat/test-pca_truncated.R +++ b/tests/testthat/test-pca_truncated.R @@ -84,6 +84,20 @@ test_that("Do nothing for num_comps = 0 and keep_original_cols = FALSE", { expect_identical(res, tibble::as_tibble(mtcars)) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_truncated(num_comp = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_pca_truncated(prefix = NULL) + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { diff --git a/tests/testthat/test-umap.R b/tests/testthat/test-umap.R index 0428a23..eec6b56 100644 --- a/tests/testthat/test-umap.R +++ b/tests/testthat/test-umap.R @@ -244,6 +244,55 @@ test_that("backwards compatible for initial and target_weight args (#213)", { ) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(num_comp = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(neighbors = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(min_dist = TRUE) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(learn_rate = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(epochs = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(initial = "wrong") %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(target_weight = -4) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_umap(prefix = NULL) + ) +}) # Infrastructure --------------------------------------------------------------- diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R index eca8cd1..d5735f0 100644 --- a/tests/testthat/test-woe.R +++ b/tests/testthat/test-woe.R @@ -266,6 +266,20 @@ test_that("tunable", { ) }) +test_that("bad args", { + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_woe(outcome = vars(mpg), Laplace = NULL) %>% + prep() + ) + expect_snapshot( + error = TRUE, + recipe(~., data = mtcars) %>% + step_woe(outcome = vars(mpg), prefix = NULL) + ) +}) + # Infrastructure --------------------------------------------------------------- test_that("bake method errors when needed non-standard role columns are missing", { From e361ae8554d273646c64e3079256e34f742182d6 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 8 Nov 2024 10:03:22 -0800 Subject: [PATCH 3/3] delete duplicate test --- tests/testthat/_snaps/embed.md | 23 ++++++----------------- tests/testthat/test-embed.R | 17 ----------------- 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/tests/testthat/_snaps/embed.md b/tests/testthat/_snaps/embed.md index 2324a20..8ea80cb 100644 --- a/tests/testthat/_snaps/embed.md +++ b/tests/testthat/_snaps/embed.md @@ -18,18 +18,17 @@ * `x3` i This may cause errors when processing new data. -# bad args +# check_name() is used Code - recipe(Species ~ ., data = three_class) %>% step_embed(Sepal.Length, outcome = vars( - Species)) %>% prep(training = three_class, retain = TRUE) + prep(rec, training = dat) Condition Error in `step_embed()`: - Caused by error in `prep()`: - x All columns selected for the step should be string, factor, or ordered. - * 1 double variable found: `Sepal.Length` + Caused by error in `bake()`: + ! Name collision occurred. The following variable names already exist: + * `x3_embed_1` ---- +# bad args Code recipe(~., data = mtcars) %>% step_embed(outcome = vars(mpg), num_terms = -4) %>% @@ -49,16 +48,6 @@ Caused by error in `prep()`: ! `hidden_units` must be a whole number larger than or equal to 0, not the number -4. -# check_name() is used - - Code - prep(rec, training = dat) - Condition - Error in `step_embed()`: - Caused by error in `bake()`: - ! Name collision occurred. The following variable names already exist: - * `x3_embed_1` - # bake method errors when needed non-standard role columns are missing Code diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R index 243b482..b60cdc1 100644 --- a/tests/testthat/test-embed.R +++ b/tests/testthat/test-embed.R @@ -283,23 +283,6 @@ test_that("character encoded predictor", { ) }) -test_that("bad args", { - skip_on_cran() - skip_if_not_installed("keras") - skip_if(!embed:::is_tf_available()) - - three_class <- iris - three_class$fac <- rep(letters[1:3], 50) - three_class$logical <- rep(c(TRUE, FALSE), 75) - - expect_snapshot( - error = TRUE, - recipe(Species ~ ., data = three_class) %>% - step_embed(Sepal.Length, outcome = vars(Species)) %>% - prep(training = three_class, retain = TRUE) - ) -}) - test_that("check_name() is used", { skip_on_cran() skip_if_not_installed("keras")