From 7f29fa629a46d2f66e4d3f519737eb8e1d438a4f Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 24 Sep 2024 15:28:21 -0700 Subject: [PATCH 1/3] no more classes error checking --- DESCRIPTION | 4 +- tests/testthat/_snaps/collapse_cart.md | 8 + tests/testthat/_snaps/collapse_stringdist.md | 8 + tests/testthat/_snaps/discretize_cart.md | 8 + tests/testthat/_snaps/discretize_xgb.md | 146 ++++++++++--------- tests/testthat/_snaps/embed.md | 8 + tests/testthat/_snaps/feature_hash.md | 8 + tests/testthat/_snaps/lencode_bayes.md | 8 + tests/testthat/_snaps/lencode_glm.md | 8 + tests/testthat/_snaps/lencode_mixed.md | 8 + tests/testthat/_snaps/pca_sparse.md | 8 + tests/testthat/_snaps/pca_sparse_bayes.md | 8 + tests/testthat/_snaps/pca_truncated.md | 8 + tests/testthat/_snaps/woe.md | 8 + tests/testthat/test-collapse_cart.R | 8 +- tests/testthat/test-collapse_stringdist.R | 8 +- tests/testthat/test-discretize_cart.R | 8 +- tests/testthat/test-discretize_xgb.R | 12 +- tests/testthat/test-embed.R | 6 +- tests/testthat/test-feature_hash.R | 8 +- tests/testthat/test-lencode_bayes.R | 6 +- tests/testthat/test-lencode_glm.R | 8 +- tests/testthat/test-lencode_mixed.R | 8 +- tests/testthat/test-pca_sparse.R | 6 +- tests/testthat/test-pca_sparse_bayes.R | 6 +- tests/testthat/test-pca_truncated.R | 8 +- tests/testthat/test-umap.R | 6 +- tests/testthat/test-woe.R | 6 +- 28 files changed, 228 insertions(+), 122 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8d0cdf63..e1ccc3ac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,7 @@ URL: https://embed.tidymodels.org, https://github.com/tidymodels/embed BugReports: https://github.com/tidymodels/embed/issues Depends: R (>= 3.6), - recipes (>= 1.0.7) + recipes (>= 1.1.0.9000) Imports: glue, dplyr (>= 1.1.0), @@ -52,6 +52,8 @@ Suggests: testthat (>= 3.0.0), VBsparsePCA, xgboost +Remotes: + tidymodels/recipes ByteCompile: true Config/Needs/website: tidymodels, ggiraph, tidyverse/tidytemplate, reticulate Config/testthat/edition: 3 diff --git a/tests/testthat/_snaps/collapse_cart.md b/tests/testthat/_snaps/collapse_cart.md index abdc3d6e..81f830ea 100644 --- a/tests/testthat/_snaps/collapse_cart.md +++ b/tests/testthat/_snaps/collapse_cart.md @@ -1,3 +1,11 @@ +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ames[, -1]) + Condition + Error in `step_collapse_cart()`: + ! The following required column is missing from `new_data`: MS_SubClass. + # empty printing Code diff --git a/tests/testthat/_snaps/collapse_stringdist.md b/tests/testthat/_snaps/collapse_stringdist.md index 2c13d3a8..fbddb1f4 100644 --- a/tests/testthat/_snaps/collapse_stringdist.md +++ b/tests/testthat/_snaps/collapse_stringdist.md @@ -1,3 +1,11 @@ +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ames[, -1]) + Condition + Error in `step_collapse_stringdist()`: + ! The following required column is missing from `new_data`: MS_SubClass. + # empty printing Code diff --git a/tests/testthat/_snaps/discretize_cart.md b/tests/testthat/_snaps/discretize_cart.md index c342dae5..b643b9cd 100644 --- a/tests/testthat/_snaps/discretize_cart.md +++ b/tests/testthat/_snaps/discretize_cart.md @@ -88,6 +88,14 @@ -- Operations * Discretizing variables using CART: x and z | Trained, weighted +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = sim_tr_cls[, -1]) + Condition + Error in `step_discretize_cart()`: + ! The following required column is missing from `new_data`: x. + # empty printing Code diff --git a/tests/testthat/_snaps/discretize_xgb.md b/tests/testthat/_snaps/discretize_xgb.md index 5d9ca324..8aa8444c 100644 --- a/tests/testthat/_snaps/discretize_xgb.md +++ b/tests/testthat/_snaps/discretize_xgb.md @@ -4,35 +4,35 @@ xgboost Output ##### xgb.Booster - raw: 74.2 Kb + raw: 74.2 Kb call: - xgboost::xgb.train(params = .params, data = .train, nrounds = 100, - watchlist = list(train = .train, test = .test), verbose = 0, - early_stopping_rounds = 10, tree_method = "hist", objective = .objective, - nthread = 1) + xgboost::xgb.train(params = .params, data = .train, nrounds = 100, + watchlist = list(train = .train, test = .test), verbose = 0, + early_stopping_rounds = 10, tree_method = "hist", objective = .objective, + nthread = 1) params (as set within xgb.train): - eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "binary:logistic", nthread = "1", validate_parameters = "TRUE" + eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "binary:logistic", nthread = "1", validate_parameters = "TRUE" xgb.attributes: - best_iteration, best_msg, best_ntreelimit, best_score, niter + best_iteration, best_msg, best_ntreelimit, best_score, niter callbacks: - cb.evaluation.log() - cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, - verbose = verbose) - # of features: 13 + cb.evaluation.log() + cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, + verbose = verbose) + # of features: 13 niter: 96 - best_iteration : 86 - best_ntreelimit : 86 - best_score : 0.4421503 - best_msg : [86] train-logloss:0.417583 test-logloss:0.442150 - nfeatures : 13 + best_iteration : 86 + best_ntreelimit : 86 + best_score : 0.4421503 + best_msg : [86] train-logloss:0.417583 test-logloss:0.442150 + nfeatures : 13 evaluation_log: - iter train_logloss test_logloss - - 1 0.6279229 0.6303495 - 2 0.5869984 0.5894989 - --- - 95 0.4157892 0.4425857 - 96 0.4156102 0.4432699 + iter train_logloss test_logloss + + 1 0.6279229 0.6303495 + 2 0.5869984 0.5894989 + --- --- --- + 95 0.4157892 0.4425857 + 96 0.4156102 0.4432699 # run_xgboost for multi-classification @@ -40,35 +40,35 @@ xgboost Output ##### xgb.Booster - raw: 149.7 Kb + raw: 149.7 Kb call: - xgboost::xgb.train(params = .params, data = .train, nrounds = 100, - watchlist = list(train = .train, test = .test), verbose = 0, - early_stopping_rounds = 10, tree_method = "hist", objective = .objective, - nthread = 1) + xgboost::xgb.train(params = .params, data = .train, nrounds = 100, + watchlist = list(train = .train, test = .test), verbose = 0, + early_stopping_rounds = 10, tree_method = "hist", objective = .objective, + nthread = 1) params (as set within xgb.train): - eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", num_class = "6", tree_method = "hist", objective = "multi:softprob", nthread = "1", validate_parameters = "TRUE" + eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", num_class = "6", tree_method = "hist", objective = "multi:softprob", nthread = "1", validate_parameters = "TRUE" xgb.attributes: - best_iteration, best_msg, best_ntreelimit, best_score, niter + best_iteration, best_msg, best_ntreelimit, best_score, niter callbacks: - cb.evaluation.log() - cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, - verbose = verbose) - # of features: 30 + cb.evaluation.log() + cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, + verbose = verbose) + # of features: 30 niter: 33 - best_iteration : 23 - best_ntreelimit : 23 - best_score : 1.246428 - best_msg : [23] train-mlogloss:1.178121 test-mlogloss:1.246428 - nfeatures : 30 + best_iteration : 23 + best_ntreelimit : 23 + best_score : 1.246428 + best_msg : [23] train-mlogloss:1.178121 test-mlogloss:1.246428 + nfeatures : 30 evaluation_log: - iter train_mlogloss test_mlogloss - - 1 1.623174 1.631783 - 2 1.515108 1.531188 - --- - 32 1.159813 1.249701 - 33 1.158088 1.250462 + iter train_mlogloss test_mlogloss + + 1 1.623174 1.631783 + 2 1.515108 1.531188 + --- --- --- + 32 1.159813 1.249701 + 33 1.158088 1.250462 # run_xgboost for regression @@ -76,35 +76,35 @@ xgboost Output ##### xgb.Booster - raw: 40.2 Kb + raw: 40.2 Kb call: - xgboost::xgb.train(params = .params, data = .train, nrounds = 100, - watchlist = list(train = .train, test = .test), verbose = 0, - early_stopping_rounds = 10, tree_method = "hist", objective = .objective, - nthread = 1) + xgboost::xgb.train(params = .params, data = .train, nrounds = 100, + watchlist = list(train = .train, test = .test), verbose = 0, + early_stopping_rounds = 10, tree_method = "hist", objective = .objective, + nthread = 1) params (as set within xgb.train): - eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE" + eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE" xgb.attributes: - best_iteration, best_msg, best_ntreelimit, best_score, niter + best_iteration, best_msg, best_ntreelimit, best_score, niter callbacks: - cb.evaluation.log() - cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, - verbose = verbose) - # of features: 73 + cb.evaluation.log() + cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, + verbose = verbose) + # of features: 73 niter: 50 - best_iteration : 40 - best_ntreelimit : 40 - best_score : 0.1165337 - best_msg : [40] train-rmse:0.064010 test-rmse:0.116534 - nfeatures : 73 + best_iteration : 40 + best_ntreelimit : 40 + best_score : 0.1165337 + best_msg : [40] train-rmse:0.064010 test-rmse:0.116534 + nfeatures : 73 evaluation_log: - iter train_rmse test_rmse - - 1 3.31007782 3.3068878 - 2 2.31969213 2.3262197 - --- - 49 0.06207940 0.1175223 - 50 0.06191289 0.1188113 + iter train_rmse test_rmse + + 1 3.31007782 3.3068878 + 2 2.31969213 2.3262197 + --- --- --- + 49 0.06207940 0.1175223 + 50 0.06191289 0.1188113 # xgb_binning for classification @@ -292,6 +292,14 @@ -- Operations * Discretizing variables using xgboost: x and z | Trained, weighted +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = sim_tr_cls[, -1]) + Condition + Error in `step_discretize_xgb()`: + ! The following required column is missing from `new_data`: x. + # empty printing Code diff --git a/tests/testthat/_snaps/embed.md b/tests/testthat/_snaps/embed.md index 1bf18646..46778ab1 100644 --- a/tests/testthat/_snaps/embed.md +++ b/tests/testthat/_snaps/embed.md @@ -39,6 +39,14 @@ ! Name collision occurred. The following variable names already exist: * `x3_embed_1` +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ex_dat[, -3]) + Condition + Error in `step_embed()`: + ! The following required column is missing from `new_data`: x3. + # empty printing Code diff --git a/tests/testthat/_snaps/feature_hash.md b/tests/testthat/_snaps/feature_hash.md index 54a69451..2862e6fb 100644 --- a/tests/testthat/_snaps/feature_hash.md +++ b/tests/testthat/_snaps/feature_hash.md @@ -18,6 +18,14 @@ ! Name collision occurred. The following variable names already exist: * `x3_hash_01` +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ex_dat[, -3]) + Condition + Error in `step_feature_hash()`: + ! The following required column is missing from `new_data`: x3. + # empty printing Code diff --git a/tests/testthat/_snaps/lencode_bayes.md b/tests/testthat/_snaps/lencode_bayes.md index 96664736..e909b323 100644 --- a/tests/testthat/_snaps/lencode_bayes.md +++ b/tests/testthat/_snaps/lencode_bayes.md @@ -101,6 +101,14 @@ -- Operations * Linear embedding for factors via Bayesian GLM for: x3 | Trained, weighted +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ex_dat[, -3]) + Condition + Error in `step_lencode_bayes()`: + ! The following required column is missing from `new_data`: x3. + # empty printing Code diff --git a/tests/testthat/_snaps/lencode_glm.md b/tests/testthat/_snaps/lencode_glm.md index 2e54662a..9105a6da 100644 --- a/tests/testthat/_snaps/lencode_glm.md +++ b/tests/testthat/_snaps/lencode_glm.md @@ -54,6 +54,14 @@ -- Operations * Linear embedding for factors via GLM for: x3 | Trained, weighted +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ex_dat[, -3]) + Condition + Error in `step_lencode_glm()`: + ! The following required column is missing from `new_data`: x3. + # empty printing Code diff --git a/tests/testthat/_snaps/lencode_mixed.md b/tests/testthat/_snaps/lencode_mixed.md index c5333520..58dded21 100644 --- a/tests/testthat/_snaps/lencode_mixed.md +++ b/tests/testthat/_snaps/lencode_mixed.md @@ -49,6 +49,14 @@ -- Operations * Linear embedding for factors via mixed effects for: x3 | Trained, weighted +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = ex_dat[, -3]) + Condition + Error in `step_lencode_mixed()`: + ! The following required column is missing from `new_data`: x3. + # empty printing Code diff --git a/tests/testthat/_snaps/pca_sparse.md b/tests/testthat/_snaps/pca_sparse.md index 3fab7c00..360049ed 100644 --- a/tests/testthat/_snaps/pca_sparse.md +++ b/tests/testthat/_snaps/pca_sparse.md @@ -26,6 +26,14 @@ ! Name collision occurred. The following variable names already exist: * `PC1` +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = tr[, -3]) + Condition + Error in `step_pca_sparse()`: + ! The following required column is missing from `new_data`: avg_inten_ch_1. + # empty printing Code diff --git a/tests/testthat/_snaps/pca_sparse_bayes.md b/tests/testthat/_snaps/pca_sparse_bayes.md index 6f5bbf07..26c878b4 100644 --- a/tests/testthat/_snaps/pca_sparse_bayes.md +++ b/tests/testthat/_snaps/pca_sparse_bayes.md @@ -26,6 +26,14 @@ ! Name collision occurred. The following variable names already exist: * `PC1` +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = tr[, -3]) + Condition + Error in `step_pca_sparse_bayes()`: + ! The following required column is missing from `new_data`: avg_inten_ch_1. + # empty printing Code diff --git a/tests/testthat/_snaps/pca_truncated.md b/tests/testthat/_snaps/pca_truncated.md index 0724e71a..1168e1b4 100644 --- a/tests/testthat/_snaps/pca_truncated.md +++ b/tests/testthat/_snaps/pca_truncated.md @@ -8,6 +8,14 @@ ! Name collision occurred. The following variable names already exist: * `PC1` +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = tr[, -3]) + Condition + Error in `step_pca_truncated()`: + ! The following required column is missing from `new_data`: avg_inten_ch_1. + # empty printing Code diff --git a/tests/testthat/_snaps/woe.md b/tests/testthat/_snaps/woe.md index 2856081b..2a52af88 100644 --- a/tests/testthat/_snaps/woe.md +++ b/tests/testthat/_snaps/woe.md @@ -109,6 +109,14 @@ Caused by error in `dictionary()`: ! 'outcome' must have exactly 2 categories (has 3) +# bake method errors when needed non-standard role columns are missing + + Code + bake(rec_trained, new_data = credit_data[, -8]) + Condition + Error in `step_woe()`: + ! The following required column is missing from `new_data`: Job. + # empty printing Code diff --git a/tests/testthat/test-collapse_cart.R b/tests/testthat/test-collapse_cart.R index 19a56c44..4b7f1990 100644 --- a/tests/testthat/test-collapse_cart.R +++ b/tests/testthat/test-collapse_cart.R @@ -119,9 +119,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ames, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ames[, -1]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ames[, -1]) ) }) @@ -171,4 +171,4 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-collapse_stringdist.R b/tests/testthat/test-collapse_stringdist.R index 464a3c50..66bb0d0d 100644 --- a/tests/testthat/test-collapse_stringdist.R +++ b/tests/testthat/test-collapse_stringdist.R @@ -212,9 +212,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ames, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ames[, -1]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ames[, -1]) ) }) @@ -266,4 +266,4 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R index 67e91d4c..ae17b113 100644 --- a/tests/testthat/test-discretize_cart.R +++ b/tests/testthat/test-discretize_cart.R @@ -222,9 +222,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE) ) - expect_error( - bake(rec_trained, new_data = sim_tr_cls[, -1]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = sim_tr_cls[, -1]) ) }) @@ -288,4 +288,4 @@ test_that("tunable is setup to works with extract_parameter_set_dials", { expect_s3_class(params, "parameters") expect_identical(nrow(params), 3L) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-discretize_xgb.R b/tests/testthat/test-discretize_xgb.R index 865df140..ee94576a 100644 --- a/tests/testthat/test-discretize_xgb.R +++ b/tests/testthat/test-discretize_xgb.R @@ -121,7 +121,7 @@ test_that("run_xgboost for classification", { .num_class = NA ) - expect_snapshot(xgboost) + expect_snapshot(xgboost, transform = trimws) expect_equal(length(xgboost$params), 8) expect_equal(xgboost$nfeatures, 13) expect_equal(xgboost$params$tree_method, "hist") @@ -142,7 +142,7 @@ test_that("run_xgboost for multi-classification", { .objective = "multi:softprob" ) - expect_snapshot(xgboost) + expect_snapshot(xgboost, transform = trimws) expect_equal(length(xgboost$params), 9) expect_equal(xgboost$nfeatures, 30) expect_equal(xgboost$params$tree_method, "hist") @@ -163,7 +163,7 @@ test_that("run_xgboost for regression", { .num_class = NA ) - expect_snapshot(xgboost) + expect_snapshot(xgboost, transform = trimws) expect_true(length(xgboost$params) > 1) expect_true(xgboost$nfeatures > 1) expect_equal(xgboost$params$tree_method, "hist") @@ -639,9 +639,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = sim_tr_cls[, -1]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = sim_tr_cls[, -1]) ) }) diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R index 369c085d..440af7f3 100644 --- a/tests/testthat/test-embed.R +++ b/tests/testthat/test-embed.R @@ -350,9 +350,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ex_dat, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ex_dat[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ex_dat[, -3]) ) }) diff --git a/tests/testthat/test-feature_hash.R b/tests/testthat/test-feature_hash.R index d45992ca..e128a8e6 100644 --- a/tests/testthat/test-feature_hash.R +++ b/tests/testthat/test-feature_hash.R @@ -137,9 +137,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ex_dat, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ex_dat[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ex_dat[, -3]) ) }) @@ -257,4 +257,4 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-lencode_bayes.R b/tests/testthat/test-lencode_bayes.R index d777a3fe..5e7c5812 100644 --- a/tests/testthat/test-lencode_bayes.R +++ b/tests/testthat/test-lencode_bayes.R @@ -425,9 +425,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ex_dat, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ex_dat[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ex_dat[, -3]) ) }) diff --git a/tests/testthat/test-lencode_glm.R b/tests/testthat/test-lencode_glm.R index c6b8d4a9..5115ed4c 100644 --- a/tests/testthat/test-lencode_glm.R +++ b/tests/testthat/test-lencode_glm.R @@ -276,9 +276,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ex_dat, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ex_dat[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ex_dat[, -3]) ) }) @@ -330,4 +330,4 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-lencode_mixed.R b/tests/testthat/test-lencode_mixed.R index 5aa63a56..3cffa109 100644 --- a/tests/testthat/test-lencode_mixed.R +++ b/tests/testthat/test-lencode_mixed.R @@ -301,9 +301,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = ex_dat, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = ex_dat[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = ex_dat[, -3]) ) }) @@ -358,4 +358,4 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-pca_sparse.R b/tests/testthat/test-pca_sparse.R index 15a51136..54394ce4 100644 --- a/tests/testthat/test-pca_sparse.R +++ b/tests/testthat/test-pca_sparse.R @@ -131,9 +131,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = tr, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = tr[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = tr[, -3]) ) }) diff --git a/tests/testthat/test-pca_sparse_bayes.R b/tests/testthat/test-pca_sparse_bayes.R index 47647388..cd3ac0ed 100644 --- a/tests/testthat/test-pca_sparse_bayes.R +++ b/tests/testthat/test-pca_sparse_bayes.R @@ -140,9 +140,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = tr, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = tr[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = tr[, -3]) ) }) diff --git a/tests/testthat/test-pca_truncated.R b/tests/testthat/test-pca_truncated.R index 43073bb8..dbe8f54d 100644 --- a/tests/testthat/test-pca_truncated.R +++ b/tests/testthat/test-pca_truncated.R @@ -107,9 +107,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = tr, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = tr[, -3]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = tr[, -3]) ) }) @@ -219,4 +219,4 @@ test_that("printing", { expect_snapshot(print(rec)) expect_snapshot(prep(rec)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-umap.R b/tests/testthat/test-umap.R index 8aa2f39d..ed0bb52a 100644 --- a/tests/testthat/test-umap.R +++ b/tests/testthat/test-umap.R @@ -257,9 +257,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = tr, verbose = FALSE) - expect_error( - bake(rec_trained, new_data = tr[, -4]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = tr[, -4]) ) }) diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R index 2c0906de..22578f8f 100644 --- a/tests/testthat/test-woe.R +++ b/tests/testthat/test-woe.R @@ -281,9 +281,9 @@ test_that("bake method errors when needed non-standard role columns are missing" rec_trained <- prep(rec, training = credit_data, verbose = FALSE) ) - expect_error( - bake(rec_trained, new_data = credit_data[, -8]), - class = "new_data_missing_column" + expect_snapshot( + error = TRUE, + bake(rec_trained, new_data = credit_data[, -8]) ) }) From ec5e8f23ac93db04deb03ce6bb208daf79e88115 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 24 Sep 2024 15:29:15 -0700 Subject: [PATCH 2/3] expect_warning() -> expect_snapshot() --- tests/testthat/_snaps/discretize_cart.md | 8 ++++++++ tests/testthat/test-discretize_cart.R | 2 +- tests/testthat/test-woe.R | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/discretize_cart.md b/tests/testthat/_snaps/discretize_cart.md index b643b9cd..67f961bf 100644 --- a/tests/testthat/_snaps/discretize_cart.md +++ b/tests/testthat/_snaps/discretize_cart.md @@ -90,6 +90,14 @@ # bake method errors when needed non-standard role columns are missing + Code + rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE) + Condition + Warning: + `step_discretize_cart()` failed to find any meaningful splits for predictor 'z', which will not be binned. + +--- + Code bake(rec_trained, new_data = sim_tr_cls[, -1]) Condition diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R index ae17b113..07de0218 100644 --- a/tests/testthat/test-discretize_cart.R +++ b/tests/testthat/test-discretize_cart.R @@ -218,7 +218,7 @@ test_that("bake method errors when needed non-standard role columns are missing" update_role(x, new_role = "potato") %>% update_role_requirements(role = "potato", bake = FALSE) - expect_warning( + expect_snapshot( rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE) ) diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R index 22578f8f..badadcdd 100644 --- a/tests/testthat/test-woe.R +++ b/tests/testthat/test-woe.R @@ -147,7 +147,7 @@ test_that("add_woe do not accept dictionary with unexpected layout", { }) # test_that("add_woe warns user if the variable has too many levels", { -# expect_warning(credit_data %>% add_woe("Status", Expenses)) +# expect_snapshot(credit_data %>% add_woe("Status", Expenses)) # }) # step_woe ---------------------------------------------------------------- From 3d2bc646f795865fe831d7173201d2f4a0ac4a81 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 24 Sep 2024 15:43:36 -0700 Subject: [PATCH 3/3] expect_error(regex = NA) -> expect_snapshot() --- tests/testthat/test-collapse_cart.R | 25 +++++++++-------------- tests/testthat/test-collapse_stringdist.R | 20 ++++++++---------- tests/testthat/test-discretize_cart.R | 20 ++++++++---------- tests/testthat/test-embed.R | 5 ++--- tests/testthat/test-feature_hash.R | 9 ++++---- tests/testthat/test-pca_sparse.R | 5 ++--- tests/testthat/test-pca_sparse_bayes.R | 5 ++--- tests/testthat/test-pca_truncated.R | 5 ++--- tests/testthat/test-umap.R | 5 ++--- tests/testthat/test-woe.R | 5 ++--- 10 files changed, 42 insertions(+), 62 deletions(-) diff --git a/tests/testthat/test-collapse_cart.R b/tests/testthat/test-collapse_cart.R index 4b7f1990..f187b723 100644 --- a/tests/testthat/test-collapse_cart.R +++ b/tests/testthat/test-collapse_cart.R @@ -3,7 +3,7 @@ test_that("collapsing factors", { data(ames, package = "modeldata") ames$Sale_Price <- log10(ames$Sale_Price) - expect_error( + expect_no_error( { rec_1 <- recipe(Sale_Price ~ ., data = ames) %>% @@ -12,8 +12,7 @@ test_that("collapsing factors", { outcome = vars(Sale_Price) ) %>% prep() - }, - regex = NA + } ) expect_true(length(rec_1$steps[[1]]$results) == 1) @@ -35,7 +34,7 @@ test_that("collapsing factors", { ) ) - expect_error( + expect_no_error( { rec_2 <- recipe(Sale_Price ~ ., data = ames) %>% @@ -44,8 +43,7 @@ test_that("collapsing factors", { min_n = 100, cost_complexity = 0.1 ) %>% prep() - }, - regex = NA + } ) expect_true( @@ -60,7 +58,7 @@ test_that("failed collapsing", { # model fails ames$Sale_Price2 <- Inf - expect_error( + expect_no_error( { rec_3 <- recipe(Sale_Price2 ~ ., data = ames) %>% @@ -69,14 +67,13 @@ test_that("failed collapsing", { outcome = vars(Sale_Price2) ) %>% prep() - }, - regex = NA + } ) expect_true(length(rec_3$steps[[1]]$results) == 0) # too many splits - expect_error( + expect_no_error( { rec_4 <- recipe(Sale_Price ~ ., data = ames) %>% @@ -86,21 +83,19 @@ test_that("failed collapsing", { cost_complexity = 0, min_n = 1 ) %>% prep() - }, - regex = NA + } ) expect_true(length(rec_4$steps[[1]]$results) == 0) # too many splits - expect_error( + expect_no_error( { rec_5 <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_cart(Central_Air, outcome = vars(Sale_Price)) %>% prep() - }, - regex = NA + } ) expect_true(length(rec_5$steps[[1]]$results) == 0) diff --git a/tests/testthat/test-collapse_stringdist.R b/tests/testthat/test-collapse_stringdist.R index 66bb0d0d..1648d6c2 100644 --- a/tests/testthat/test-collapse_stringdist.R +++ b/tests/testthat/test-collapse_stringdist.R @@ -4,14 +4,13 @@ test_that("collapsing factors", { data(ames, package = "modeldata") - expect_error( + expect_no_error( { rec_1 <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_stringdist(MS_SubClass, distance = 5) %>% prep() - }, - regex = NA + } ) expect_true(length(rec_1$steps[[1]]$results) == 1) @@ -33,14 +32,13 @@ test_that("collapsing factors", { ) ) - expect_error( + expect_no_error( { rec_2 <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_stringdist(MS_SubClass, Overall_Cond, distance = 10) %>% prep() - }, - regex = NA + } ) expect_true(length(rec_2$steps[[1]]$results) == 2) @@ -165,14 +163,13 @@ test_that("failed collapsing", { data(ames, package = "modeldata") # too many splits - expect_error( + expect_no_error( { rec_4 <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_stringdist(MS_SubClass, distance = 0) %>% prep() - }, - regex = NA + } ) expect_equal( @@ -181,14 +178,13 @@ test_that("failed collapsing", { ) # too few splits - expect_error( + expect_no_error( { rec_5 <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_stringdist(MS_SubClass, distance = 10000) %>% prep() - }, - regex = NA + } ) expect_equal( diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R index 07de0218..d4ef3670 100644 --- a/tests/testthat/test-discretize_cart.R +++ b/tests/testthat/test-discretize_cart.R @@ -17,7 +17,7 @@ mod <- rpart(y ~ x, data = sim_tr_reg) best_split <- unname(mod$splits[, "index"]) test_that("low-level binning for classification", { - expect_error( + expect_no_error( splits <- embed:::cart_binning( sim_tr_cls$x, @@ -26,8 +26,7 @@ test_that("low-level binning for classification", { cost_complexity = 0.01, tree_depth = 5, min_n = 10 - ), - regexp = NA + ) ) expect_equal(splits, best_split) @@ -47,7 +46,7 @@ test_that("low-level binning for classification", { }) test_that("low-level binning for regression", { - expect_error( + expect_no_error( splits <- embed:::cart_binning( sim_tr_reg$x, @@ -56,8 +55,7 @@ test_that("low-level binning for regression", { cost_complexity = 0.01, tree_depth = 5, min_n = 10 - ), - regexp = NA + ) ) expect_equal(splits, best_split) @@ -87,9 +85,8 @@ test_that("step function for classification", { expect_equal(names(cart_rec$steps[[1]]$rules), "x") expect_equal(cart_rec$steps[[1]]$rules$x, best_split) - expect_error( - cart_pred <- bake(cart_rec, sim_tr_cls[, -3]), - regexp = NA + expect_no_error( + cart_pred <- bake(cart_rec, sim_tr_cls[, -3]) ) expect_true(is.factor(cart_pred$x)) @@ -108,9 +105,8 @@ test_that("step function for regression", { expect_equal(names(cart_rec$steps[[1]]$rules), "x") expect_equal(cart_rec$steps[[1]]$rules$x, best_split) - expect_error( - cart_pred <- bake(cart_rec, sim_tr_reg[, -3]), - regexp = NA + expect_no_error( + cart_pred <- bake(cart_rec, sim_tr_reg[, -3]) ) expect_true(is.factor(cart_pred$x)) diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R index 440af7f3..b6a27d95 100644 --- a/tests/testthat/test-embed.R +++ b/tests/testthat/test-embed.R @@ -448,9 +448,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = ex_dat), - NA + expect_no_error( + bake(rec, new_data = ex_dat) ) }) diff --git a/tests/testthat/test-feature_hash.R b/tests/testthat/test-feature_hash.R index e128a8e6..d95eafc5 100644 --- a/tests/testthat/test-feature_hash.R +++ b/tests/testthat/test-feature_hash.R @@ -10,7 +10,7 @@ test_that("basic usage", { rec <- recipe(x1 ~ x3, data = ex_dat) %>% step_feature_hash(x3) - expect_error(rec_tr <- prep(rec), regex = NA) + expect_no_error(rec_tr <- prep(rec)) res_tr <- bake(rec_tr, new_data = NULL, dplyr::starts_with("x3")) @@ -64,7 +64,7 @@ test_that("basic usage - character strings", { rec <- recipe(x1 ~ x3, data = ex_dat) %>% step_feature_hash(x3) - expect_error(rec_tr <- prep(rec), regex = NA) + expect_no_error(rec_tr <- prep(rec)) res_tr <- bake(rec_tr, new_data = NULL, dplyr::starts_with("x3")) @@ -241,9 +241,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = ex_dat), - NA + expect_no_error( + bake(rec, new_data = ex_dat) ) }) diff --git a/tests/testthat/test-pca_sparse.R b/tests/testthat/test-pca_sparse.R index 54394ce4..775424df 100644 --- a/tests/testthat/test-pca_sparse.R +++ b/tests/testthat/test-pca_sparse.R @@ -231,9 +231,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = cells), - NA + expect_no_error( + bake(rec, new_data = cells) ) }) diff --git a/tests/testthat/test-pca_sparse_bayes.R b/tests/testthat/test-pca_sparse_bayes.R index cd3ac0ed..842961ab 100644 --- a/tests/testthat/test-pca_sparse_bayes.R +++ b/tests/testthat/test-pca_sparse_bayes.R @@ -240,9 +240,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = cells), - NA + expect_no_error( + bake(rec, new_data = cells) ) }) diff --git a/tests/testthat/test-pca_truncated.R b/tests/testthat/test-pca_truncated.R index dbe8f54d..c47de411 100644 --- a/tests/testthat/test-pca_truncated.R +++ b/tests/testthat/test-pca_truncated.R @@ -207,9 +207,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = cells), - NA + expect_no_error( + bake(rec, new_data = cells) ) }) diff --git a/tests/testthat/test-umap.R b/tests/testthat/test-umap.R index ed0bb52a..0428a235 100644 --- a/tests/testthat/test-umap.R +++ b/tests/testthat/test-umap.R @@ -344,9 +344,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = mtcars), - NA + expect_no_error( + bake(rec, new_data = mtcars) ) }) diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R index badadcdd..eca8cd12 100644 --- a/tests/testthat/test-woe.R +++ b/tests/testthat/test-woe.R @@ -376,9 +376,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { rec <- prep(rec) ) - expect_error( - bake(rec, new_data = ames), - NA + expect_no_error( + bake(rec, new_data = ames) ) })