From 7f29fa629a46d2f66e4d3f519737eb8e1d438a4f Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Tue, 24 Sep 2024 15:28:21 -0700
Subject: [PATCH 1/3] no more classes error checking

---
 DESCRIPTION                                  |   4 +-
 tests/testthat/_snaps/collapse_cart.md       |   8 +
 tests/testthat/_snaps/collapse_stringdist.md |   8 +
 tests/testthat/_snaps/discretize_cart.md     |   8 +
 tests/testthat/_snaps/discretize_xgb.md      | 146 ++++++++++---------
 tests/testthat/_snaps/embed.md               |   8 +
 tests/testthat/_snaps/feature_hash.md        |   8 +
 tests/testthat/_snaps/lencode_bayes.md       |   8 +
 tests/testthat/_snaps/lencode_glm.md         |   8 +
 tests/testthat/_snaps/lencode_mixed.md       |   8 +
 tests/testthat/_snaps/pca_sparse.md          |   8 +
 tests/testthat/_snaps/pca_sparse_bayes.md    |   8 +
 tests/testthat/_snaps/pca_truncated.md       |   8 +
 tests/testthat/_snaps/woe.md                 |   8 +
 tests/testthat/test-collapse_cart.R          |   8 +-
 tests/testthat/test-collapse_stringdist.R    |   8 +-
 tests/testthat/test-discretize_cart.R        |   8 +-
 tests/testthat/test-discretize_xgb.R         |  12 +-
 tests/testthat/test-embed.R                  |   6 +-
 tests/testthat/test-feature_hash.R           |   8 +-
 tests/testthat/test-lencode_bayes.R          |   6 +-
 tests/testthat/test-lencode_glm.R            |   8 +-
 tests/testthat/test-lencode_mixed.R          |   8 +-
 tests/testthat/test-pca_sparse.R             |   6 +-
 tests/testthat/test-pca_sparse_bayes.R       |   6 +-
 tests/testthat/test-pca_truncated.R          |   8 +-
 tests/testthat/test-umap.R                   |   6 +-
 tests/testthat/test-woe.R                    |   6 +-
 28 files changed, 228 insertions(+), 122 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8d0cdf63..e1ccc3ac 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -18,7 +18,7 @@ URL: https://embed.tidymodels.org, https://github.com/tidymodels/embed
 BugReports: https://github.com/tidymodels/embed/issues
 Depends: 
     R (>= 3.6),
-    recipes (>= 1.0.7)
+    recipes (>= 1.1.0.9000)
 Imports:
     glue,
     dplyr (>= 1.1.0),
@@ -52,6 +52,8 @@ Suggests:
     testthat (>= 3.0.0),
     VBsparsePCA,
     xgboost
+Remotes:
+    tidymodels/recipes
 ByteCompile: true
 Config/Needs/website: tidymodels, ggiraph, tidyverse/tidytemplate, reticulate
 Config/testthat/edition: 3
diff --git a/tests/testthat/_snaps/collapse_cart.md b/tests/testthat/_snaps/collapse_cart.md
index abdc3d6e..81f830ea 100644
--- a/tests/testthat/_snaps/collapse_cart.md
+++ b/tests/testthat/_snaps/collapse_cart.md
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ames[, -1])
+    Condition
+      Error in `step_collapse_cart()`:
+      ! The following required column is missing from `new_data`: MS_SubClass.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/collapse_stringdist.md b/tests/testthat/_snaps/collapse_stringdist.md
index 2c13d3a8..fbddb1f4 100644
--- a/tests/testthat/_snaps/collapse_stringdist.md
+++ b/tests/testthat/_snaps/collapse_stringdist.md
@@ -1,3 +1,11 @@
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ames[, -1])
+    Condition
+      Error in `step_collapse_stringdist()`:
+      ! The following required column is missing from `new_data`: MS_SubClass.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/discretize_cart.md b/tests/testthat/_snaps/discretize_cart.md
index c342dae5..b643b9cd 100644
--- a/tests/testthat/_snaps/discretize_cart.md
+++ b/tests/testthat/_snaps/discretize_cart.md
@@ -88,6 +88,14 @@
       -- Operations 
       * Discretizing variables using CART: x and z | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = sim_tr_cls[, -1])
+    Condition
+      Error in `step_discretize_cart()`:
+      ! The following required column is missing from `new_data`: x.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/discretize_xgb.md b/tests/testthat/_snaps/discretize_xgb.md
index 5d9ca324..8aa8444c 100644
--- a/tests/testthat/_snaps/discretize_xgb.md
+++ b/tests/testthat/_snaps/discretize_xgb.md
@@ -4,35 +4,35 @@
       xgboost
     Output
       ##### xgb.Booster
-      raw: 74.2 Kb 
+      raw: 74.2 Kb
       call:
-        xgboost::xgb.train(params = .params, data = .train, nrounds = 100, 
-          watchlist = list(train = .train, test = .test), verbose = 0, 
-          early_stopping_rounds = 10, tree_method = "hist", objective = .objective, 
-          nthread = 1)
+      xgboost::xgb.train(params = .params, data = .train, nrounds = 100,
+      watchlist = list(train = .train, test = .test), verbose = 0,
+      early_stopping_rounds = 10, tree_method = "hist", objective = .objective,
+      nthread = 1)
       params (as set within xgb.train):
-        eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "binary:logistic", nthread = "1", validate_parameters = "TRUE"
+      eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "binary:logistic", nthread = "1", validate_parameters = "TRUE"
       xgb.attributes:
-        best_iteration, best_msg, best_ntreelimit, best_score, niter
+      best_iteration, best_msg, best_ntreelimit, best_score, niter
       callbacks:
-        cb.evaluation.log()
-        cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
-          verbose = verbose)
-      # of features: 13 
+      cb.evaluation.log()
+      cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
+      verbose = verbose)
+      # of features: 13
       niter: 96
-      best_iteration : 86 
-      best_ntreelimit : 86 
-      best_score : 0.4421503 
-      best_msg : [86]	train-logloss:0.417583	test-logloss:0.442150 
-      nfeatures : 13 
+      best_iteration : 86
+      best_ntreelimit : 86
+      best_score : 0.4421503
+      best_msg : [86]	train-logloss:0.417583	test-logloss:0.442150
+      nfeatures : 13
       evaluation_log:
-           iter train_logloss test_logloss
-          <num>         <num>        <num>
-              1     0.6279229    0.6303495
-              2     0.5869984    0.5894989
-      ---                                 
-             95     0.4157892    0.4425857
-             96     0.4156102    0.4432699
+      iter train_logloss test_logloss
+      <num>         <num>        <num>
+      1     0.6279229    0.6303495
+      2     0.5869984    0.5894989
+      ---           ---          ---
+      95     0.4157892    0.4425857
+      96     0.4156102    0.4432699
 
 # run_xgboost for multi-classification
 
@@ -40,35 +40,35 @@
       xgboost
     Output
       ##### xgb.Booster
-      raw: 149.7 Kb 
+      raw: 149.7 Kb
       call:
-        xgboost::xgb.train(params = .params, data = .train, nrounds = 100, 
-          watchlist = list(train = .train, test = .test), verbose = 0, 
-          early_stopping_rounds = 10, tree_method = "hist", objective = .objective, 
-          nthread = 1)
+      xgboost::xgb.train(params = .params, data = .train, nrounds = 100,
+      watchlist = list(train = .train, test = .test), verbose = 0,
+      early_stopping_rounds = 10, tree_method = "hist", objective = .objective,
+      nthread = 1)
       params (as set within xgb.train):
-        eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", num_class = "6", tree_method = "hist", objective = "multi:softprob", nthread = "1", validate_parameters = "TRUE"
+      eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", num_class = "6", tree_method = "hist", objective = "multi:softprob", nthread = "1", validate_parameters = "TRUE"
       xgb.attributes:
-        best_iteration, best_msg, best_ntreelimit, best_score, niter
+      best_iteration, best_msg, best_ntreelimit, best_score, niter
       callbacks:
-        cb.evaluation.log()
-        cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
-          verbose = verbose)
-      # of features: 30 
+      cb.evaluation.log()
+      cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
+      verbose = verbose)
+      # of features: 30
       niter: 33
-      best_iteration : 23 
-      best_ntreelimit : 23 
-      best_score : 1.246428 
-      best_msg : [23]	train-mlogloss:1.178121	test-mlogloss:1.246428 
-      nfeatures : 30 
+      best_iteration : 23
+      best_ntreelimit : 23
+      best_score : 1.246428
+      best_msg : [23]	train-mlogloss:1.178121	test-mlogloss:1.246428
+      nfeatures : 30
       evaluation_log:
-           iter train_mlogloss test_mlogloss
-          <num>          <num>         <num>
-              1       1.623174      1.631783
-              2       1.515108      1.531188
-      ---                                   
-             32       1.159813      1.249701
-             33       1.158088      1.250462
+      iter train_mlogloss test_mlogloss
+      <num>          <num>         <num>
+      1       1.623174      1.631783
+      2       1.515108      1.531188
+      ---            ---           ---
+      32       1.159813      1.249701
+      33       1.158088      1.250462
 
 # run_xgboost for regression
 
@@ -76,35 +76,35 @@
       xgboost
     Output
       ##### xgb.Booster
-      raw: 40.2 Kb 
+      raw: 40.2 Kb
       call:
-        xgboost::xgb.train(params = .params, data = .train, nrounds = 100, 
-          watchlist = list(train = .train, test = .test), verbose = 0, 
-          early_stopping_rounds = 10, tree_method = "hist", objective = .objective, 
-          nthread = 1)
+      xgboost::xgb.train(params = .params, data = .train, nrounds = 100,
+      watchlist = list(train = .train, test = .test), verbose = 0,
+      early_stopping_rounds = 10, tree_method = "hist", objective = .objective,
+      nthread = 1)
       params (as set within xgb.train):
-        eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
+      eta = "0.3", max_bin = "10", max_depth = "1", min_child_weight = "5", tree_method = "hist", objective = "reg:squarederror", nthread = "1", validate_parameters = "TRUE"
       xgb.attributes:
-        best_iteration, best_msg, best_ntreelimit, best_score, niter
+      best_iteration, best_msg, best_ntreelimit, best_score, niter
       callbacks:
-        cb.evaluation.log()
-        cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize, 
-          verbose = verbose)
-      # of features: 73 
+      cb.evaluation.log()
+      cb.early.stop(stopping_rounds = early_stopping_rounds, maximize = maximize,
+      verbose = verbose)
+      # of features: 73
       niter: 50
-      best_iteration : 40 
-      best_ntreelimit : 40 
-      best_score : 0.1165337 
-      best_msg : [40]	train-rmse:0.064010	test-rmse:0.116534 
-      nfeatures : 73 
+      best_iteration : 40
+      best_ntreelimit : 40
+      best_score : 0.1165337
+      best_msg : [40]	train-rmse:0.064010	test-rmse:0.116534
+      nfeatures : 73
       evaluation_log:
-           iter train_rmse test_rmse
-          <num>      <num>     <num>
-              1 3.31007782 3.3068878
-              2 2.31969213 2.3262197
-      ---                           
-             49 0.06207940 0.1175223
-             50 0.06191289 0.1188113
+      iter train_rmse test_rmse
+      <num>      <num>     <num>
+      1 3.31007782 3.3068878
+      2 2.31969213 2.3262197
+      ---        ---       ---
+      49 0.06207940 0.1175223
+      50 0.06191289 0.1188113
 
 # xgb_binning for classification
 
@@ -292,6 +292,14 @@
       -- Operations 
       * Discretizing variables using xgboost: x and z | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = sim_tr_cls[, -1])
+    Condition
+      Error in `step_discretize_xgb()`:
+      ! The following required column is missing from `new_data`: x.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/embed.md b/tests/testthat/_snaps/embed.md
index 1bf18646..46778ab1 100644
--- a/tests/testthat/_snaps/embed.md
+++ b/tests/testthat/_snaps/embed.md
@@ -39,6 +39,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `x3_embed_1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_embed()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/feature_hash.md b/tests/testthat/_snaps/feature_hash.md
index 54a69451..2862e6fb 100644
--- a/tests/testthat/_snaps/feature_hash.md
+++ b/tests/testthat/_snaps/feature_hash.md
@@ -18,6 +18,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `x3_hash_01`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_feature_hash()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/lencode_bayes.md b/tests/testthat/_snaps/lencode_bayes.md
index 96664736..e909b323 100644
--- a/tests/testthat/_snaps/lencode_bayes.md
+++ b/tests/testthat/_snaps/lencode_bayes.md
@@ -101,6 +101,14 @@
       -- Operations 
       * Linear embedding for factors via Bayesian GLM for: x3 | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode_bayes()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/lencode_glm.md b/tests/testthat/_snaps/lencode_glm.md
index 2e54662a..9105a6da 100644
--- a/tests/testthat/_snaps/lencode_glm.md
+++ b/tests/testthat/_snaps/lencode_glm.md
@@ -54,6 +54,14 @@
       -- Operations 
       * Linear embedding for factors via GLM for: x3 | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode_glm()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/lencode_mixed.md b/tests/testthat/_snaps/lencode_mixed.md
index c5333520..58dded21 100644
--- a/tests/testthat/_snaps/lencode_mixed.md
+++ b/tests/testthat/_snaps/lencode_mixed.md
@@ -49,6 +49,14 @@
       -- Operations 
       * Linear embedding for factors via mixed effects for: x3 | Trained, weighted
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = ex_dat[, -3])
+    Condition
+      Error in `step_lencode_mixed()`:
+      ! The following required column is missing from `new_data`: x3.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/pca_sparse.md b/tests/testthat/_snaps/pca_sparse.md
index 3fab7c00..360049ed 100644
--- a/tests/testthat/_snaps/pca_sparse.md
+++ b/tests/testthat/_snaps/pca_sparse.md
@@ -26,6 +26,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `PC1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = tr[, -3])
+    Condition
+      Error in `step_pca_sparse()`:
+      ! The following required column is missing from `new_data`: avg_inten_ch_1.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/pca_sparse_bayes.md b/tests/testthat/_snaps/pca_sparse_bayes.md
index 6f5bbf07..26c878b4 100644
--- a/tests/testthat/_snaps/pca_sparse_bayes.md
+++ b/tests/testthat/_snaps/pca_sparse_bayes.md
@@ -26,6 +26,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `PC1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = tr[, -3])
+    Condition
+      Error in `step_pca_sparse_bayes()`:
+      ! The following required column is missing from `new_data`: avg_inten_ch_1.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/pca_truncated.md b/tests/testthat/_snaps/pca_truncated.md
index 0724e71a..1168e1b4 100644
--- a/tests/testthat/_snaps/pca_truncated.md
+++ b/tests/testthat/_snaps/pca_truncated.md
@@ -8,6 +8,14 @@
       ! Name collision occurred. The following variable names already exist:
       * `PC1`
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = tr[, -3])
+    Condition
+      Error in `step_pca_truncated()`:
+      ! The following required column is missing from `new_data`: avg_inten_ch_1.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/_snaps/woe.md b/tests/testthat/_snaps/woe.md
index 2856081b..2a52af88 100644
--- a/tests/testthat/_snaps/woe.md
+++ b/tests/testthat/_snaps/woe.md
@@ -109,6 +109,14 @@
       Caused by error in `dictionary()`:
       ! 'outcome' must have exactly 2 categories (has 3)
 
+# bake method errors when needed non-standard role columns are missing
+
+    Code
+      bake(rec_trained, new_data = credit_data[, -8])
+    Condition
+      Error in `step_woe()`:
+      ! The following required column is missing from `new_data`: Job.
+
 # empty printing
 
     Code
diff --git a/tests/testthat/test-collapse_cart.R b/tests/testthat/test-collapse_cart.R
index 19a56c44..4b7f1990 100644
--- a/tests/testthat/test-collapse_cart.R
+++ b/tests/testthat/test-collapse_cart.R
@@ -119,9 +119,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ames, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ames[, -1]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ames[, -1])
   )
 })
 
@@ -171,4 +171,4 @@ test_that("printing", {
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-collapse_stringdist.R b/tests/testthat/test-collapse_stringdist.R
index 464a3c50..66bb0d0d 100644
--- a/tests/testthat/test-collapse_stringdist.R
+++ b/tests/testthat/test-collapse_stringdist.R
@@ -212,9 +212,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ames, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ames[, -1]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ames[, -1])
   )
 })
 
@@ -266,4 +266,4 @@ test_that("printing", {
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R
index 67e91d4c..ae17b113 100644
--- a/tests/testthat/test-discretize_cart.R
+++ b/tests/testthat/test-discretize_cart.R
@@ -222,9 +222,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
     rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE)
   )
   
-  expect_error(
-    bake(rec_trained, new_data = sim_tr_cls[, -1]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = sim_tr_cls[, -1])
   )
 })
 
@@ -288,4 +288,4 @@ test_that("tunable is setup to works with extract_parameter_set_dials", {
   
   expect_s3_class(params, "parameters")
   expect_identical(nrow(params), 3L)
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-discretize_xgb.R b/tests/testthat/test-discretize_xgb.R
index 865df140..ee94576a 100644
--- a/tests/testthat/test-discretize_xgb.R
+++ b/tests/testthat/test-discretize_xgb.R
@@ -121,7 +121,7 @@ test_that("run_xgboost for classification", {
     .num_class = NA
   )
 
-  expect_snapshot(xgboost)
+  expect_snapshot(xgboost, transform = trimws)
   expect_equal(length(xgboost$params), 8)
   expect_equal(xgboost$nfeatures, 13)
   expect_equal(xgboost$params$tree_method, "hist")
@@ -142,7 +142,7 @@ test_that("run_xgboost for multi-classification", {
     .objective = "multi:softprob"
   )
 
-  expect_snapshot(xgboost)
+  expect_snapshot(xgboost, transform = trimws)
   expect_equal(length(xgboost$params), 9)
   expect_equal(xgboost$nfeatures, 30)
   expect_equal(xgboost$params$tree_method, "hist")
@@ -163,7 +163,7 @@ test_that("run_xgboost for regression", {
     .num_class = NA
   )
 
-  expect_snapshot(xgboost)
+  expect_snapshot(xgboost, transform = trimws)
   expect_true(length(xgboost$params) > 1)
   expect_true(xgboost$nfeatures > 1)
   expect_equal(xgboost$params$tree_method, "hist")
@@ -639,9 +639,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = sim_tr_cls[, -1]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = sim_tr_cls[, -1])
   )
 })
 
diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R
index 369c085d..440af7f3 100644
--- a/tests/testthat/test-embed.R
+++ b/tests/testthat/test-embed.R
@@ -350,9 +350,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ex_dat, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ex_dat[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ex_dat[, -3])
   )
 })
 
diff --git a/tests/testthat/test-feature_hash.R b/tests/testthat/test-feature_hash.R
index d45992ca..e128a8e6 100644
--- a/tests/testthat/test-feature_hash.R
+++ b/tests/testthat/test-feature_hash.R
@@ -137,9 +137,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ex_dat, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ex_dat[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ex_dat[, -3])
   )
 })
 
@@ -257,4 +257,4 @@ test_that("printing", {
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-lencode_bayes.R b/tests/testthat/test-lencode_bayes.R
index d777a3fe..5e7c5812 100644
--- a/tests/testthat/test-lencode_bayes.R
+++ b/tests/testthat/test-lencode_bayes.R
@@ -425,9 +425,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ex_dat, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ex_dat[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ex_dat[, -3])
   )
 })
 
diff --git a/tests/testthat/test-lencode_glm.R b/tests/testthat/test-lencode_glm.R
index c6b8d4a9..5115ed4c 100644
--- a/tests/testthat/test-lencode_glm.R
+++ b/tests/testthat/test-lencode_glm.R
@@ -276,9 +276,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ex_dat, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ex_dat[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ex_dat[, -3])
   )
 })
 
@@ -330,4 +330,4 @@ test_that("printing", {
 
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-lencode_mixed.R b/tests/testthat/test-lencode_mixed.R
index 5aa63a56..3cffa109 100644
--- a/tests/testthat/test-lencode_mixed.R
+++ b/tests/testthat/test-lencode_mixed.R
@@ -301,9 +301,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = ex_dat, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = ex_dat[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = ex_dat[, -3])
   )
 })
 
@@ -358,4 +358,4 @@ test_that("printing", {
 
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-pca_sparse.R b/tests/testthat/test-pca_sparse.R
index 15a51136..54394ce4 100644
--- a/tests/testthat/test-pca_sparse.R
+++ b/tests/testthat/test-pca_sparse.R
@@ -131,9 +131,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = tr, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = tr[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = tr[, -3])
   )
 })
 
diff --git a/tests/testthat/test-pca_sparse_bayes.R b/tests/testthat/test-pca_sparse_bayes.R
index 47647388..cd3ac0ed 100644
--- a/tests/testthat/test-pca_sparse_bayes.R
+++ b/tests/testthat/test-pca_sparse_bayes.R
@@ -140,9 +140,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = tr, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = tr[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = tr[, -3])
   )
 })
 
diff --git a/tests/testthat/test-pca_truncated.R b/tests/testthat/test-pca_truncated.R
index 43073bb8..dbe8f54d 100644
--- a/tests/testthat/test-pca_truncated.R
+++ b/tests/testthat/test-pca_truncated.R
@@ -107,9 +107,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = tr, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = tr[, -3]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = tr[, -3])
   )
 })
 
@@ -219,4 +219,4 @@ test_that("printing", {
   
   expect_snapshot(print(rec))
   expect_snapshot(prep(rec))
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-umap.R b/tests/testthat/test-umap.R
index 8aa2f39d..ed0bb52a 100644
--- a/tests/testthat/test-umap.R
+++ b/tests/testthat/test-umap.R
@@ -257,9 +257,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
   
   rec_trained <- prep(rec, training = tr, verbose = FALSE)
   
-  expect_error(
-    bake(rec_trained, new_data = tr[, -4]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = tr[, -4])
   )
 })
 
diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R
index 2c0906de..22578f8f 100644
--- a/tests/testthat/test-woe.R
+++ b/tests/testthat/test-woe.R
@@ -281,9 +281,9 @@ test_that("bake method errors when needed non-standard role columns are missing"
     rec_trained <- prep(rec, training = credit_data, verbose = FALSE)
   )
   
-  expect_error(
-    bake(rec_trained, new_data = credit_data[, -8]),
-    class = "new_data_missing_column"
+  expect_snapshot(
+    error = TRUE,
+    bake(rec_trained, new_data = credit_data[, -8])
   )
 })
 

From ec5e8f23ac93db04deb03ce6bb208daf79e88115 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Tue, 24 Sep 2024 15:29:15 -0700
Subject: [PATCH 2/3] expect_warning() -> expect_snapshot()

---
 tests/testthat/_snaps/discretize_cart.md | 8 ++++++++
 tests/testthat/test-discretize_cart.R    | 2 +-
 tests/testthat/test-woe.R                | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/_snaps/discretize_cart.md b/tests/testthat/_snaps/discretize_cart.md
index b643b9cd..67f961bf 100644
--- a/tests/testthat/_snaps/discretize_cart.md
+++ b/tests/testthat/_snaps/discretize_cart.md
@@ -90,6 +90,14 @@
 
 # bake method errors when needed non-standard role columns are missing
 
+    Code
+      rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE)
+    Condition
+      Warning:
+      `step_discretize_cart()` failed to find any meaningful splits for predictor 'z', which will not be binned.
+
+---
+
     Code
       bake(rec_trained, new_data = sim_tr_cls[, -1])
     Condition
diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R
index ae17b113..07de0218 100644
--- a/tests/testthat/test-discretize_cart.R
+++ b/tests/testthat/test-discretize_cart.R
@@ -218,7 +218,7 @@ test_that("bake method errors when needed non-standard role columns are missing"
     update_role(x, new_role = "potato") %>%
     update_role_requirements(role = "potato", bake = FALSE)
   
-  expect_warning(
+  expect_snapshot(
     rec_trained <- prep(rec, training = sim_tr_cls, verbose = FALSE)
   )
   
diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R
index 22578f8f..badadcdd 100644
--- a/tests/testthat/test-woe.R
+++ b/tests/testthat/test-woe.R
@@ -147,7 +147,7 @@ test_that("add_woe do not accept dictionary with unexpected layout", {
 })
 
 # test_that("add_woe warns user if the variable has too many levels", {
-#   expect_warning(credit_data %>% add_woe("Status", Expenses))
+#   expect_snapshot(credit_data %>% add_woe("Status", Expenses))
 # })
 
 # step_woe ----------------------------------------------------------------

From 3d2bc646f795865fe831d7173201d2f4a0ac4a81 Mon Sep 17 00:00:00 2001
From: Emil Hvitfeldt <emilhhvitfeldt@gmail.com>
Date: Tue, 24 Sep 2024 15:43:36 -0700
Subject: [PATCH 3/3] expect_error(regex = NA) -> expect_snapshot()

---
 tests/testthat/test-collapse_cart.R       | 25 +++++++++--------------
 tests/testthat/test-collapse_stringdist.R | 20 ++++++++----------
 tests/testthat/test-discretize_cart.R     | 20 ++++++++----------
 tests/testthat/test-embed.R               |  5 ++---
 tests/testthat/test-feature_hash.R        |  9 ++++----
 tests/testthat/test-pca_sparse.R          |  5 ++---
 tests/testthat/test-pca_sparse_bayes.R    |  5 ++---
 tests/testthat/test-pca_truncated.R       |  5 ++---
 tests/testthat/test-umap.R                |  5 ++---
 tests/testthat/test-woe.R                 |  5 ++---
 10 files changed, 42 insertions(+), 62 deletions(-)

diff --git a/tests/testthat/test-collapse_cart.R b/tests/testthat/test-collapse_cart.R
index 4b7f1990..f187b723 100644
--- a/tests/testthat/test-collapse_cart.R
+++ b/tests/testthat/test-collapse_cart.R
@@ -3,7 +3,7 @@ test_that("collapsing factors", {
   data(ames, package = "modeldata")
   ames$Sale_Price <- log10(ames$Sale_Price)
 
-  expect_error(
+  expect_no_error(
     {
       rec_1 <-
         recipe(Sale_Price ~ ., data = ames) %>%
@@ -12,8 +12,7 @@ test_that("collapsing factors", {
           outcome = vars(Sale_Price)
         ) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(length(rec_1$steps[[1]]$results) == 1)
@@ -35,7 +34,7 @@ test_that("collapsing factors", {
     )
   )
 
-  expect_error(
+  expect_no_error(
     {
       rec_2 <-
         recipe(Sale_Price ~ ., data = ames) %>%
@@ -44,8 +43,7 @@ test_that("collapsing factors", {
           min_n = 100, cost_complexity = 0.1
         ) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(
@@ -60,7 +58,7 @@ test_that("failed collapsing", {
 
   # model fails
   ames$Sale_Price2 <- Inf
-  expect_error(
+  expect_no_error(
     {
       rec_3 <-
         recipe(Sale_Price2 ~ ., data = ames) %>%
@@ -69,14 +67,13 @@ test_that("failed collapsing", {
           outcome = vars(Sale_Price2)
         ) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(length(rec_3$steps[[1]]$results) == 0)
 
   # too many splits
-  expect_error(
+  expect_no_error(
     {
       rec_4 <-
         recipe(Sale_Price ~ ., data = ames) %>%
@@ -86,21 +83,19 @@ test_that("failed collapsing", {
           cost_complexity = 0, min_n = 1
         ) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(length(rec_4$steps[[1]]$results) == 0)
 
   # too many splits
-  expect_error(
+  expect_no_error(
     {
       rec_5 <-
         recipe(Sale_Price ~ ., data = ames) %>%
         step_collapse_cart(Central_Air, outcome = vars(Sale_Price)) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(length(rec_5$steps[[1]]$results) == 0)
diff --git a/tests/testthat/test-collapse_stringdist.R b/tests/testthat/test-collapse_stringdist.R
index 66bb0d0d..1648d6c2 100644
--- a/tests/testthat/test-collapse_stringdist.R
+++ b/tests/testthat/test-collapse_stringdist.R
@@ -4,14 +4,13 @@ test_that("collapsing factors", {
   
   data(ames, package = "modeldata")
 
-  expect_error(
+  expect_no_error(
     {
       rec_1 <-
         recipe(Sale_Price ~ ., data = ames) %>%
         step_collapse_stringdist(MS_SubClass, distance = 5) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(length(rec_1$steps[[1]]$results) == 1)
@@ -33,14 +32,13 @@ test_that("collapsing factors", {
     )
   )
 
-  expect_error(
+  expect_no_error(
     {
       rec_2 <-
         recipe(Sale_Price ~ ., data = ames) %>%
         step_collapse_stringdist(MS_SubClass, Overall_Cond, distance = 10) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_true(length(rec_2$steps[[1]]$results) == 2)
@@ -165,14 +163,13 @@ test_that("failed collapsing", {
   data(ames, package = "modeldata")
 
   # too many splits
-  expect_error(
+  expect_no_error(
     {
       rec_4 <-
         recipe(Sale_Price ~ ., data = ames) %>%
         step_collapse_stringdist(MS_SubClass, distance = 0) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_equal(
@@ -181,14 +178,13 @@ test_that("failed collapsing", {
   )
 
   # too few splits
-  expect_error(
+  expect_no_error(
     {
       rec_5 <-
         recipe(Sale_Price ~ ., data = ames) %>%
         step_collapse_stringdist(MS_SubClass, distance = 10000) %>%
         prep()
-    },
-    regex = NA
+    }
   )
 
   expect_equal(
diff --git a/tests/testthat/test-discretize_cart.R b/tests/testthat/test-discretize_cart.R
index 07de0218..d4ef3670 100644
--- a/tests/testthat/test-discretize_cart.R
+++ b/tests/testthat/test-discretize_cart.R
@@ -17,7 +17,7 @@ mod <- rpart(y ~ x, data = sim_tr_reg)
 best_split <- unname(mod$splits[, "index"])
 
 test_that("low-level binning for classification", {
-  expect_error(
+  expect_no_error(
     splits <-
       embed:::cart_binning(
         sim_tr_cls$x,
@@ -26,8 +26,7 @@ test_that("low-level binning for classification", {
         cost_complexity = 0.01,
         tree_depth = 5,
         min_n = 10
-      ),
-    regexp = NA
+      )
   )
   expect_equal(splits, best_split)
 
@@ -47,7 +46,7 @@ test_that("low-level binning for classification", {
 })
 
 test_that("low-level binning for regression", {
-  expect_error(
+  expect_no_error(
     splits <-
       embed:::cart_binning(
         sim_tr_reg$x,
@@ -56,8 +55,7 @@ test_that("low-level binning for regression", {
         cost_complexity = 0.01,
         tree_depth = 5,
         min_n = 10
-      ),
-    regexp = NA
+      )
   )
   expect_equal(splits, best_split)
 
@@ -87,9 +85,8 @@ test_that("step function for classification", {
   expect_equal(names(cart_rec$steps[[1]]$rules), "x")
   expect_equal(cart_rec$steps[[1]]$rules$x, best_split)
 
-  expect_error(
-    cart_pred <- bake(cart_rec, sim_tr_cls[, -3]),
-    regexp = NA
+  expect_no_error(
+    cart_pred <- bake(cart_rec, sim_tr_cls[, -3])
   )
 
   expect_true(is.factor(cart_pred$x))
@@ -108,9 +105,8 @@ test_that("step function for regression", {
   expect_equal(names(cart_rec$steps[[1]]$rules), "x")
   expect_equal(cart_rec$steps[[1]]$rules$x, best_split)
 
-  expect_error(
-    cart_pred <- bake(cart_rec, sim_tr_reg[, -3]),
-    regexp = NA
+  expect_no_error(
+    cart_pred <- bake(cart_rec, sim_tr_reg[, -3])
   )
 
   expect_true(is.factor(cart_pred$x))
diff --git a/tests/testthat/test-embed.R b/tests/testthat/test-embed.R
index 440af7f3..b6a27d95 100644
--- a/tests/testthat/test-embed.R
+++ b/tests/testthat/test-embed.R
@@ -448,9 +448,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = ex_dat),
-    NA
+  expect_no_error(
+    bake(rec, new_data = ex_dat)
   )
 })
 
diff --git a/tests/testthat/test-feature_hash.R b/tests/testthat/test-feature_hash.R
index e128a8e6..d95eafc5 100644
--- a/tests/testthat/test-feature_hash.R
+++ b/tests/testthat/test-feature_hash.R
@@ -10,7 +10,7 @@ test_that("basic usage", {
   rec <- recipe(x1 ~ x3, data = ex_dat) %>%
     step_feature_hash(x3)
 
-  expect_error(rec_tr <- prep(rec), regex = NA)
+  expect_no_error(rec_tr <- prep(rec))
 
   res_tr <- bake(rec_tr, new_data = NULL, dplyr::starts_with("x3"))
 
@@ -64,7 +64,7 @@ test_that("basic usage - character strings", {
   rec <- recipe(x1 ~ x3, data = ex_dat) %>%
     step_feature_hash(x3)
 
-  expect_error(rec_tr <- prep(rec), regex = NA)
+  expect_no_error(rec_tr <- prep(rec))
 
   res_tr <- bake(rec_tr, new_data = NULL, dplyr::starts_with("x3"))
 
@@ -241,9 +241,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = ex_dat),
-    NA
+  expect_no_error(
+    bake(rec, new_data = ex_dat)
   )
 })
 
diff --git a/tests/testthat/test-pca_sparse.R b/tests/testthat/test-pca_sparse.R
index 54394ce4..775424df 100644
--- a/tests/testthat/test-pca_sparse.R
+++ b/tests/testthat/test-pca_sparse.R
@@ -231,9 +231,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = cells),
-    NA
+  expect_no_error(
+    bake(rec, new_data = cells)
   )
 })
 
diff --git a/tests/testthat/test-pca_sparse_bayes.R b/tests/testthat/test-pca_sparse_bayes.R
index cd3ac0ed..842961ab 100644
--- a/tests/testthat/test-pca_sparse_bayes.R
+++ b/tests/testthat/test-pca_sparse_bayes.R
@@ -240,9 +240,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = cells),
-    NA
+  expect_no_error(
+    bake(rec, new_data = cells)
   )
 })
 
diff --git a/tests/testthat/test-pca_truncated.R b/tests/testthat/test-pca_truncated.R
index dbe8f54d..c47de411 100644
--- a/tests/testthat/test-pca_truncated.R
+++ b/tests/testthat/test-pca_truncated.R
@@ -207,9 +207,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = cells),
-    NA
+  expect_no_error(
+    bake(rec, new_data = cells)
   )
 })
 
diff --git a/tests/testthat/test-umap.R b/tests/testthat/test-umap.R
index ed0bb52a..0428a235 100644
--- a/tests/testthat/test-umap.R
+++ b/tests/testthat/test-umap.R
@@ -344,9 +344,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = mtcars),
-    NA
+  expect_no_error(
+    bake(rec, new_data = mtcars)
   )
 })
 
diff --git a/tests/testthat/test-woe.R b/tests/testthat/test-woe.R
index badadcdd..eca8cd12 100644
--- a/tests/testthat/test-woe.R
+++ b/tests/testthat/test-woe.R
@@ -376,9 +376,8 @@ test_that("keep_original_cols - can prep recipes with it missing", {
     rec <- prep(rec)
   )
   
-  expect_error(
-    bake(rec, new_data = ames),
-    NA
+  expect_no_error(
+    bake(rec, new_data = ames)
   )
 })