Case weights
diff --git a/dev/search.json b/dev/search.json
index 4213f47..eee1d8e 100644
--- a/dev/search.json
+++ b/dev/search.json
@@ -1 +1 @@
-[{"path":[]},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to embed","title":"Contributing to embed","text":"outlines propose change embed. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to embed","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to embed","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed).","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to embed","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"topepo/embed\", fork = TRUE). Install development dependences devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Bigger changes","what":"Code style","title":"Contributing to embed","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat unit tests. Contributions test cases included easier accept.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to embed","text":"Please note embed project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://embed.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 embed authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://embed.tidymodels.org/dev/articles/Applications/GLM.html","id":"no-pooling","dir":"Articles > Applications","previous_headings":"","what":"No Pooling","title":"Using Generalized Linear Models","text":"case, effect sponsor code can estimated separately factor level. One method conducting estimation step fit logistic regression acceptance classification outcome sponsor code predictor. , log-odds naturally estimated logistic regression. data, recipe created step_lencode_glm used: tidy method can used extract encodings merged raw estimates: sponsor codes n > 1, estimates effectively : Note also effect used novel sponsor code future data sets average effect:","code":"grants_glm <- recipe(class ~ ., data = grants_other) %>% # specify the variable being encoded and the outcome step_lencode_glm(sponsor_code, outcome = vars(class)) %>% # estimate the effects prep(training = grants_other) glm_estimates <- tidy(grants_glm, number = 1) %>% dplyr::select(-terms, -id) glm_estimates ## # A tibble: 292 × 2 ## level value ##
## 1 100D 0.405 ## 2 101A -1.95 ## 3 103C -2.08 ## 4 105A -1.61 ## 5 107C 16.6 ## 6 10B 16.6 ## 7 111C -1.61 ## 8 112D 0.693 ## 9 113A 0 ## 10 118B 0 ## # ℹ 282 more rows glm_estimates <- glm_estimates %>% set_names(c(\"sponsor_code\", \"glm\")) %>% inner_join(props, by = \"sponsor_code\") glm_estimates %>% dplyr::filter(is.finite(log_odds)) %>% mutate(difference = log_odds - glm) %>% dplyr::select(difference) %>% summary() ## difference ## Min. :-1.332e-15 ## 1st Qu.:-2.220e-16 ## Median : 0.000e+00 ## Mean :-5.139e-17 ## 3rd Qu.: 1.604e-16 ## Max. : 8.882e-16 tidy(grants_glm, number = 1) %>% dplyr::filter(level == \"..new\") %>% select(-id) ## # A tibble: 1 × 3 ## level value terms ## ## 1 ..new -2.88 sponsor_code"},{"path":"https://embed.tidymodels.org/dev/articles/Applications/GLM.html","id":"partial-pooling","dir":"Articles > Applications","previous_headings":"","what":"Partial Pooling","title":"Using Generalized Linear Models","text":"method estimates effects using sponsor codes using hierarchical Bayesian generalized linear model. sponsor codes treated random set contributes random intercept previously used logistic regression. Partial pooling estimates effect combination separate empirical estimates log-odds prior distribution. sponsor codes small sample sizes, final estimate shrunken towards overall mean log-odds. makes sense since poor information estimating sponsor codes. sponsor codes many data points, estimates reply empirical estimates. page good discussion pooling using Bayesian models.","code":"# due to Matrix problems knitr::knit_exit()"},{"path":"https://embed.tidymodels.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Emil Hvitfeldt. Author, maintainer. Max Kuhn. Author. . Copyright holder, funder.","code":""},{"path":"https://embed.tidymodels.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Hvitfeldt E, Kuhn M (2024). embed: Extra Recipes Encoding Predictors. R package version 1.1.4.9000, https://github.com/tidymodels/embed, https://embed.tidymodels.org.","code":"@Manual{, title = {embed: Extra Recipes for Encoding Predictors}, author = {Emil Hvitfeldt and Max Kuhn}, year = {2024}, note = {R package version 1.1.4.9000, https://github.com/tidymodels/embed}, url = {https://embed.tidymodels.org}, }"},{"path":[]},{"path":"https://embed.tidymodels.org/dev/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"Extra Recipes for Encoding Predictors","text":"embed extra steps recipes package embedding predictors one numeric columns. Almost preprocessing methods supervised. steps available separate package step dependencies, rstanarm, lme4, keras, fairly heavy. steps handle categorical predictors: step_lencode_glm(), step_lencode_bayes(), step_lencode_mixed() estimate effect factor levels outcome estimates used new encoding. estimates estimated generalized linear model. step can executed without pooling (via glm) partial pooling (stan_glm lmer). Currently implemented numeric two-class outcomes. step_embed() uses keras::layer_embedding translate original C factor levels set D new variables (< C). model fitting routine optimizes factor levels mapped new variables well corresponding regression coefficients (.e., neural network weights) used new encodings. step_woe() creates new variables based weight evidence encodings. step_feature_hash() can create indicator variables using feature hashing. numeric predictors: step_umap() uses nonlinear transformation similar t-SNE can used project transformation new data. supervised unsupervised methods can used. step_discretize_xgb() step_discretize_cart() can make binned versions numeric predictors using supervised tree-based models. step_pca_sparse() step_pca_sparse_bayes() conduct feature extraction sparsity component loadings. references methods : Francois C Allaire JJ (2018) Deep Learning R, Manning Guo, C Berkhahn F (2016) “Entity Embeddings Categorical Variables” Micci-Barreca D (2001) “preprocessing scheme high-cardinality categorical attributes classification prediction problems,” ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) “vtreat: data.frame Processor Predictive Modeling” McInnes L Healy J (2018) UMAP: Uniform Manifold Approximation Projection Dimension Reduction Good, . J. (1985), “Weight evidence: brief survey”, Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/index.html","id":"getting-started","dir":"","previous_headings":"","what":"Getting Started","title":"Extra Recipes for Encoding Predictors","text":"two articles walk use embedding steps, using generalized linear models neural networks built via TensorFlow.","code":""},{"path":"https://embed.tidymodels.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Extra Recipes for Encoding Predictors","text":"install package: Note use steps, also install packages rstanarm lme4. steps work, may want use: get bug fix use feature development version, can install development version package GitHub.","code":"install.packages(\"embed\") install.packages(c(\"rpart\", \"xgboost\", \"rstanarm\", \"lme4\")) # install.packages(\"pak\") pak::pak(\"tidymodels/embed\")"},{"path":"https://embed.tidymodels.org/dev/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Extra Recipes for Encoding Predictors","text":"project released Contributor Code Conduct. contributing project, agree abide terms. questions discussions tidymodels packages, modeling, machine learning, please post RStudio Community. think encountered bug, please submit issue. Either way, learn create share reprex (minimal, reproducible example), clearly communicate code. Check details contributing guidelines tidymodels packages get help.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":null,"dir":"Reference","previous_headings":"","what":"Add WoE in a data frame — add_woe","title":"Add WoE in a data frame — add_woe","text":"tidyverse friendly way plug WoE versions set predictor variables given binary outcome.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add WoE in a data frame — add_woe","text":"","code":"add_woe(.data, outcome, ..., dictionary = NULL, prefix = \"woe\")"},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add WoE in a data frame — add_woe","text":".data tbl. data.frame plug new woe version columns. outcome bare name outcome variable. ... Bare names predictor variables, passed pass variables dplyr::select(). means can use helpers like starts_with() matches(). dictionary tbl. NULL function build dictionary variables passed .... can pass custom dictionary , see dictionary() details. prefix character string prefix resulting new variables.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add WoE in a data frame — add_woe","text":"tibble original columns .data plus woe columns wanted.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add WoE in a data frame — add_woe","text":"can pass custom dictionary add_woe(). must exactly structure output dictionary(). One easy way tweak output returned .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add WoE in a data frame — add_woe","text":"","code":"mtcars %>% add_woe(\"am\", cyl, gear:carb) #> # A tibble: 32 × 14 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 3 more variables: woe_cyl , woe_gear , woe_carb "},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":null,"dir":"Reference","previous_headings":"","what":"Weight of evidence dictionary — dictionary","title":"Weight of evidence dictionary — dictionary","text":"Builds woe dictionary set predictor variables upon given binary outcome. Convenient make woe version given set predictor variables also allow one tweak woe values hand.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Weight of evidence dictionary — dictionary","text":"","code":"dictionary(.data, outcome, ..., Laplace = 1e-06)"},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Weight of evidence dictionary — dictionary","text":".data tbl. data.frame variables come . outcome bare name outcome variable exactly 2 distinct values. ... bare names predictor variables selectors accepted dplyr::select(). Laplace Default 1e-6. pseudocount parameter Laplace Smoothing estimator. Value avoid -Inf/Inf predictor category one outcome class. Set 0 allow Inf/-Inf.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Weight of evidence dictionary — dictionary","text":"tibble summaries woe every given predictor variable stacked .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Weight of evidence dictionary — dictionary","text":"can pass custom dictionary step_woe(). must exactly structure output dictionary(). One easy way tweaking output returned .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Weight of evidence dictionary — dictionary","text":"Kullback, S. (1959). Information Theory Statistics. Wiley, New York. Hastie, T., Tibshirani, R. Friedman, J. (1986). Elements Statistical Learning, Second Edition, Springer, 2009. Good, . J. (1985), \"Weight evidence: brief survey\", Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Weight of evidence dictionary — dictionary","text":"","code":"mtcars %>% dictionary(\"am\", cyl, gear:carb) #> # A tibble: 12 × 9 #> variable predictor n_tot n_0 n_1 p_0 p_1 woe outcome #> #> 1 cyl 4 11 3 8 0.158 0.615 1.36 am #> 2 cyl 6 7 4 3 0.211 0.231 0.0918 am #> 3 cyl 8 14 12 2 0.632 0.154 -1.41 am #> 4 gear 3 15 15 0 0.789 0 -16.1 am #> 5 gear 4 12 4 8 0.211 0.615 1.07 am #> 6 gear 5 5 0 5 0 0.385 15.8 am #> 7 carb 1 7 3 4 0.158 0.308 0.667 am #> 8 carb 2 10 6 4 0.316 0.308 -0.0260 am #> 9 carb 3 3 3 0 0.158 0 -14.5 am #> 10 carb 4 10 7 3 0.368 0.231 -0.468 am #> 11 carb 6 1 0 1 0 0.0769 14.2 am #> 12 carb 8 1 0 1 0 0.0769 14.2 am"},{"path":"https://embed.tidymodels.org/dev/reference/embed-package.html","id":null,"dir":"Reference","previous_headings":"","what":"embed: Extra Recipes for Encoding Predictors — embed-package","title":"embed: Extra Recipes for Encoding Predictors — embed-package","text":"Predictors can converted one numeric representations using variety methods. Effect encodings using simple generalized linear models arXiv:1611.09477 nonlinear models arXiv:1604.06737 can used. also functions dimension reduction approaches.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/embed-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"embed: Extra Recipes for Encoding Predictors — embed-package","text":"Maintainer: Emil Hvitfeldt emil.hvitfeldt@posit.co (ORCID) Authors: Max Kuhn max@posit.co (ORCID) contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://embed.tidymodels.org/dev/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. generics required_pkgs, tidy, tunable","code":""},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":null,"dir":"Reference","previous_headings":"","what":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"Recipe-adjacent packages always list required package steps can function properly within parallel processing schemes.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"","code":"# S3 method for class 'step_collapse_cart' required_pkgs(x, ...) # S3 method for class 'step_collapse_stringdist' required_pkgs(x, ...) # S3 method for class 'step_discretize_cart' required_pkgs(x, ...) # S3 method for class 'step_discretize_xgb' required_pkgs(x, ...) # S3 method for class 'step_embed' required_pkgs(x, ...) # S3 method for class 'step_feature_hash' required_pkgs(x, ...) # S3 method for class 'step_lencode_bayes' required_pkgs(x, ...) # S3 method for class 'step_lencode_glm' required_pkgs(x, ...) # S3 method for class 'step_lencode_mixed' required_pkgs(x, ...) # S3 method for class 'step_pca_sparse' required_pkgs(x, ...) # S3 method for class 'step_pca_sparse_bayes' required_pkgs(x, ...) # S3 method for class 'step_pca_truncated' required_pkgs(x, ...) # S3 method for class 'step_umap' required_pkgs(x, ...) # S3 method for class 'step_woe' required_pkgs(x, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"x recipe step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"character vector","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":null,"dir":"Reference","previous_headings":"","what":"Compound solubility data — solubility","title":"Compound solubility data — solubility","text":"Compound solubility data","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Compound solubility data — solubility","text":"Tetko, ., Tanchuk, V., Kasheva, T., Villa, . (2001). Estimation aqueous solubility chemical compounds using E-state indices. Journal Chemical Information Computer Sciences, 41(6), 1488-1493. Huuskonen, J. (2000). Estimation aqueous solubility diverse set organic compounds based molecular topology. Journal Chemical Information Computer Sciences, 40(3), 773-777.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compound solubility data — solubility","text":"solubility data frame","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compound solubility data — solubility","text":"Tetko et al. (2001) Huuskonen (2000) investigated set compounds corresponding experimental solubility values using complex sets descriptors. used linear regression neural network models estimate relationship chemical structure solubility. analyses, use 1267 compounds set understandable descriptors fall one three groups: 208 binary \"fingerprints\" indicate presence absence particular chemical sub-structure, 16 count descriptors (number bonds number Bromine atoms) 4 continuous descriptors (molecular weight surface area).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compound solubility data — solubility","text":"","code":"data(solubility) str(solubility) #> tibble [1,267 × 229] (S3: tbl_df/tbl/data.frame) #> $ fp_001 : int [1:1267] 0 0 1 0 0 1 0 1 1 1 ... #> $ fp_002 : int [1:1267] 1 1 1 0 0 0 1 0 0 1 ... #> $ fp_003 : int [1:1267] 0 0 1 1 1 1 0 1 1 1 ... #> $ fp_004 : int [1:1267] 0 1 1 0 1 1 1 1 1 1 ... #> $ fp_005 : int [1:1267] 1 1 1 0 1 0 1 0 0 1 ... #> $ fp_006 : int [1:1267] 0 1 0 0 1 0 0 0 1 1 ... #> $ fp_007 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_008 : int [1:1267] 1 1 1 0 0 0 1 0 0 0 ... #> $ fp_009 : int [1:1267] 0 0 0 0 1 1 1 0 1 0 ... #> $ fp_010 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_011 : int [1:1267] 0 1 0 0 0 0 0 0 1 0 ... #> $ fp_012 : int [1:1267] 0 0 0 0 0 1 0 1 0 0 ... #> $ fp_013 : int [1:1267] 0 0 0 0 1 0 1 0 0 0 ... #> $ fp_014 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_015 : int [1:1267] 1 1 1 1 1 1 1 1 1 1 ... #> $ fp_016 : int [1:1267] 0 1 0 0 1 1 0 1 0 0 ... #> $ fp_017 : int [1:1267] 0 0 1 1 0 0 0 0 1 1 ... #> $ fp_018 : int [1:1267] 0 1 0 0 0 0 0 0 0 0 ... #> $ fp_019 : int [1:1267] 1 0 0 0 1 0 1 0 0 0 ... #> $ fp_020 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_021 : int [1:1267] 0 0 0 0 0 1 0 0 1 0 ... #> $ fp_022 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_023 : int [1:1267] 0 0 0 1 0 0 0 0 1 0 ... #> $ fp_024 : int [1:1267] 1 0 0 0 1 0 0 0 0 0 ... #> $ fp_025 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_026 : int [1:1267] 1 0 0 0 0 0 1 0 0 0 ... #> $ fp_027 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_028 : int [1:1267] 0 1 0 0 0 0 0 0 1 1 ... #> $ fp_029 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_030 : int [1:1267] 0 0 0 0 1 0 0 0 0 0 ... #> $ fp_031 : int [1:1267] 0 0 0 0 0 0 0 1 0 0 ... #> $ fp_032 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_033 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_034 : int [1:1267] 0 0 0 0 1 0 0 0 0 1 ... #> $ fp_035 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> $ fp_036 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_037 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> $ fp_038 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_039 : int [1:1267] 1 0 0 0 0 0 0 0 0 0 ... #> $ fp_040 : int [1:1267] 1 0 0 0 0 0 0 0 0 0 ... #> $ fp_041 : int [1:1267] 0 0 0 1 0 0 0 0 1 0 ... #> $ fp_042 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_043 : int [1:1267] 0 1 0 0 0 0 0 0 0 0 ... #> $ fp_044 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_045 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_046 : int [1:1267] 0 1 0 0 0 0 1 0 0 1 ... #> $ fp_047 : int [1:1267] 0 1 1 0 0 0 1 0 0 0 ... #> $ fp_048 : int [1:1267] 0 0 0 0 0 0 0 1 0 0 ... #> $ fp_049 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_050 : int [1:1267] 0 0 0 0 0 0 0 1 0 1 ... #> $ fp_051 : int [1:1267] 0 1 0 0 0 0 0 0 0 0 ... #> $ fp_052 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_053 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_054 : int [1:1267] 0 0 0 1 0 0 0 0 1 1 ... #> $ fp_055 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_056 : int [1:1267] 1 0 0 0 0 0 0 0 0 0 ... #> $ fp_057 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_058 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_059 : int [1:1267] 0 0 0 0 0 0 0 1 0 0 ... #> $ fp_060 : int [1:1267] 0 1 1 0 0 0 0 1 1 0 ... #> $ fp_061 : int [1:1267] 0 0 1 0 0 0 0 1 1 0 ... #> $ fp_062 : int [1:1267] 0 0 1 0 0 1 0 1 1 1 ... #> $ fp_063 : int [1:1267] 1 1 0 0 1 1 1 0 0 1 ... #> $ fp_064 : int [1:1267] 0 1 1 0 1 1 0 1 0 0 ... #> $ fp_065 : int [1:1267] 1 1 0 0 1 0 1 0 1 1 ... #> $ fp_066 : int [1:1267] 1 0 1 1 1 1 1 1 1 1 ... #> $ fp_067 : int [1:1267] 1 1 0 0 1 1 1 0 0 1 ... #> $ fp_068 : int [1:1267] 0 1 0 0 1 1 1 0 0 1 ... #> $ fp_069 : int [1:1267] 1 0 1 1 1 1 0 1 1 0 ... #> $ fp_070 : int [1:1267] 1 1 0 1 0 0 1 0 1 0 ... #> $ fp_071 : int [1:1267] 0 0 0 0 0 0 1 0 1 1 ... #> $ fp_072 : int [1:1267] 0 1 1 0 0 1 0 1 1 1 ... #> $ fp_073 : int [1:1267] 0 1 1 0 0 0 0 0 1 0 ... #> $ fp_074 : int [1:1267] 0 1 0 0 0 0 0 0 1 0 ... #> $ fp_075 : int [1:1267] 0 1 0 0 1 1 1 0 0 1 ... #> $ fp_076 : int [1:1267] 1 1 0 0 0 0 1 0 1 1 ... #> $ fp_077 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_078 : int [1:1267] 0 1 0 0 0 0 0 0 1 0 ... #> $ fp_079 : int [1:1267] 1 1 1 1 1 0 1 0 1 1 ... #> $ fp_080 : int [1:1267] 0 1 0 0 1 1 1 1 0 0 ... #> $ fp_081 : int [1:1267] 0 0 1 1 0 0 0 1 1 1 ... #> $ fp_082 : int [1:1267] 1 1 1 0 1 1 1 0 1 1 ... #> $ fp_083 : int [1:1267] 0 0 0 0 1 0 0 0 0 1 ... #> $ fp_084 : int [1:1267] 1 1 0 0 1 0 1 0 0 0 ... #> $ fp_085 : int [1:1267] 0 1 0 0 0 0 1 0 0 0 ... #> $ fp_086 : int [1:1267] 0 0 0 1 1 0 0 1 1 1 ... #> $ fp_087 : int [1:1267] 1 1 1 1 1 0 1 0 1 1 ... #> $ fp_088 : int [1:1267] 0 1 0 0 0 0 0 1 1 0 ... #> $ fp_089 : int [1:1267] 1 1 0 0 0 0 1 0 0 0 ... #> $ fp_090 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_091 : int [1:1267] 1 1 0 0 1 0 1 0 0 1 ... #> $ fp_092 : int [1:1267] 0 0 0 0 1 1 1 0 1 0 ... #> $ fp_093 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_094 : int [1:1267] 0 0 0 0 1 0 0 1 0 0 ... #> $ fp_095 : int [1:1267] 0 0 0 0 0 0 0 0 1 1 ... #> $ fp_096 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> $ fp_097 : int [1:1267] 1 1 0 0 0 0 1 0 1 0 ... #> $ fp_098 : int [1:1267] 0 0 1 0 0 0 0 1 0 0 ... #> $ fp_099 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> [list output truncated]"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Collapsing of Factor Levels — step_collapse_cart","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"step_collapse_cart() creates specification recipe step can collapse factor levels smaller set using supervised tree.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"","code":"step_collapse_cart( recipe, ..., role = NA, trained = FALSE, outcome = NULL, cost_complexity = 1e-04, min_n = 5, results = NULL, skip = FALSE, id = rand_id(\"step_collapse_cart\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome train CART models order pool factor levels. cost_complexity non-negative value regulates complexity tree pruning occurs. Values near 0.1 usually correspond tree single splits. Values zero correspond unpruned tree. min_n integer many data points required make splits tree growing process. Larger values correspond less complex trees. results list results convert new factor levels. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"updated recipe step.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"step uses CART tree (classification regression) group existing factor levels potentially smaller set. changes levels factor predictor (tidy() method can used understand translation). different ways step able collapse levels. model fails , results level split, original factor levels retained. also cases \"admissible split\" means model find signal data.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"tidy() step, tibble retruned columns terms, old, new, id: terms character, selectors variables selected old character, old levels new character, new levels id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"","code":"data(ames, package = \"modeldata\") ames$Sale_Price <- log10(ames$Sale_Price) rec <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_cart( Sale_Type, Garage_Type, Neighborhood, outcome = vars(Sale_Price) ) %>% prep() tidy(rec, number = 1) #> # A tibble: 45 × 4 #> terms old new id #> #> 1 Sale_Type \"ConLD\" Sale_Type_1 step_collapse_cart_SwlKL #> 2 Sale_Type \"ConLw\" Sale_Type_1 step_collapse_cart_SwlKL #> 3 Sale_Type \"Oth\" Sale_Type_1 step_collapse_cart_SwlKL #> 4 Sale_Type \"COD\" Sale_Type_2 step_collapse_cart_SwlKL #> 5 Sale_Type \"VWD\" Sale_Type_2 step_collapse_cart_SwlKL #> 6 Sale_Type \"ConLI\" Sale_Type_3 step_collapse_cart_SwlKL #> 7 Sale_Type \"WD \" Sale_Type_4 step_collapse_cart_SwlKL #> 8 Sale_Type \"CWD\" Sale_Type_5 step_collapse_cart_SwlKL #> 9 Sale_Type \"Con\" Sale_Type_6 step_collapse_cart_SwlKL #> 10 Sale_Type \"New\" Sale_Type_7 step_collapse_cart_SwlKL #> # ℹ 35 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":null,"dir":"Reference","previous_headings":"","what":"collapse factor levels using stringdist — step_collapse_stringdist","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"step_collapse_stringdist() creates specification recipe step collapse factor levels low stringdist .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"","code":"step_collapse_stringdist( recipe, ..., role = NA, trained = FALSE, distance = NULL, method = \"osa\", options = list(), results = NULL, columns = NULL, skip = FALSE, id = rand_id(\"collapse_stringdist\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. distance Integer, value determine strings collapsed . value used inclusive, 2 collapse levels string distance 2 lower. method Character, method distance calculation. default \"osa\", see stringdist::stringdist-metrics. options List, arguments passed stringdist::stringdistmatrix() weight, q, p, bt, used different values method. results list denoting way labels collapses stored preprocessing step trained prep(). columns character string variable names populated (eventually) terms argument. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (columns affected) base.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"tidy() step, tibble retruned columns terms, , , id: terms character, selectors variables selected character, old levels character, new levels id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"","code":"library(recipes) library(tibble) data0 <- tibble( x1 = c(\"a\", \"b\", \"d\", \"e\", \"sfgsfgsd\", \"hjhgfgjgr\"), x2 = c(\"ak\", \"b\", \"djj\", \"e\", \"hjhgfgjgr\", \"hjhgfgjgr\") ) rec <- recipe(~., data = data0) %>% step_collapse_stringdist(all_predictors(), distance = 1) %>% prep() rec %>% bake(new_data = NULL) #> # A tibble: 6 × 2 #> x1 x2 #> #> 1 a ak #> 2 a b #> 3 a djj #> 4 a b #> 5 sfgsfgsd hjhgfgjgr #> 6 hjhgfgjgr hjhgfgjgr tidy(rec, 1) #> # A tibble: 11 × 4 #> terms from to id #> #> 1 x1 a a collapse_stringdist_qIMPV #> 2 x1 b a collapse_stringdist_qIMPV #> 3 x1 d a collapse_stringdist_qIMPV #> 4 x1 e a collapse_stringdist_qIMPV #> 5 x1 hjhgfgjgr hjhgfgjgr collapse_stringdist_qIMPV #> 6 x1 sfgsfgsd sfgsfgsd collapse_stringdist_qIMPV #> 7 x2 ak ak collapse_stringdist_qIMPV #> 8 x2 b b collapse_stringdist_qIMPV #> 9 x2 e b collapse_stringdist_qIMPV #> 10 x2 djj djj collapse_stringdist_qIMPV #> 11 x2 hjhgfgjgr hjhgfgjgr collapse_stringdist_qIMPV rec <- recipe(~., data = data0) %>% step_collapse_stringdist(all_predictors(), distance = 2) %>% prep() rec %>% bake(new_data = NULL) #> # A tibble: 6 × 2 #> x1 x2 #> #> 1 a ak #> 2 a ak #> 3 a djj #> 4 a ak #> 5 sfgsfgsd hjhgfgjgr #> 6 hjhgfgjgr hjhgfgjgr tidy(rec, 1) #> # A tibble: 11 × 4 #> terms from to id #> #> 1 x1 a a collapse_stringdist_fLSSY #> 2 x1 b a collapse_stringdist_fLSSY #> 3 x1 d a collapse_stringdist_fLSSY #> 4 x1 e a collapse_stringdist_fLSSY #> 5 x1 hjhgfgjgr hjhgfgjgr collapse_stringdist_fLSSY #> 6 x1 sfgsfgsd sfgsfgsd collapse_stringdist_fLSSY #> 7 x2 ak ak collapse_stringdist_fLSSY #> 8 x2 b ak collapse_stringdist_fLSSY #> 9 x2 e ak collapse_stringdist_fLSSY #> 10 x2 djj djj collapse_stringdist_fLSSY #> 11 x2 hjhgfgjgr hjhgfgjgr collapse_stringdist_fLSSY"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":null,"dir":"Reference","previous_headings":"","what":"Discretize numeric variables with CART — step_discretize_cart","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step_discretize_cart() creates specification recipe step discretize numeric data (e.g. integers doubles) bins supervised way using CART model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Discretize numeric variables with CART — step_discretize_cart","text":"","code":"step_discretize_cart( recipe, ..., role = NA, trained = FALSE, outcome = NULL, cost_complexity = 0.01, tree_depth = 10, min_n = 20, rules = NULL, skip = FALSE, id = rand_id(\"discretize_cart\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Discretize numeric variables with CART — step_discretize_cart","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. role Defaults \"predictor\". trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome train CART models order discretize explanatory variables. cost_complexity regularization parameter. split decrease overall lack fit factor cost_complexity attempted. Corresponds cp rpart::rpart(). Defaults 0.01. tree_depth maximum depth final tree. Corresponds maxdepth rpart::rpart(). Defaults 10. min_n number data points node required continue splitting. Corresponds minsplit rpart::rpart(). Defaults 20. rules splitting rules best CART tree retain variable. length zero, splitting used column. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Discretize numeric variables with CART — step_discretize_cart","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step_discretize_cart() creates non-uniform bins numerical variables utilizing information outcome variable applying CART model. best selection buckets variable selected using standard cost-complexity pruning CART, makes discretization method resistant overfitting. step requires rpart package. installed, step stop note installing package. Note original data replaced new bins.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Discretize numeric variables with CART — step_discretize_cart","text":"tidy() step, tibble retruned columns terms, value, id: terms character, selectors variables selected value numeric, location splits id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step 3 tuning parameters: cost_complexity: Cost-Complexity Parameter (type: double, default: 0.01) tree_depth: Tree Depth (type: integer, default: 10) min_n: Minimal Node Size (type: integer, default: 20)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Discretize numeric variables with CART — step_discretize_cart","text":"","code":"library(modeldata) data(ad_data) library(rsample) split <- initial_split(ad_data, strata = \"Class\") ad_data_tr <- training(split) ad_data_te <- testing(split) cart_rec <- recipe(Class ~ ., data = ad_data_tr) %>% step_discretize_cart( tau, age, p_tau, Ab_42, outcome = \"Class\", id = \"cart splits\" ) cart_rec <- prep(cart_rec, training = ad_data_tr) # The splits: tidy(cart_rec, id = \"cart splits\") #> # A tibble: 16 × 3 #> terms value id #> #> 1 tau 5.74 cart splits #> 2 tau 5.79 cart splits #> 3 tau 5.89 cart splits #> 4 tau 6.00 cart splits #> 5 tau 6.15 cart splits #> 6 tau 6.25 cart splits #> 7 tau 6.30 cart splits #> 8 tau 6.42 cart splits #> 9 tau 6.66 cart splits #> 10 age 0.986 cart splits #> 11 age 0.987 cart splits #> 12 p_tau 3.90 cart splits #> 13 p_tau 4.62 cart splits #> 14 Ab_42 10.6 cart splits #> 15 Ab_42 11.2 cart splits #> 16 Ab_42 11.3 cart splits bake(cart_rec, ad_data_te, tau) #> # A tibble: 84 × 1 #> tau #> #> 1 [-Inf,5.744) #> 2 [6.147,6.249) #> 3 [6.664, Inf] #> 4 [5.995,6.147) #> 5 [-Inf,5.744) #> 6 [-Inf,5.744) #> 7 [6.422,6.664) #> 8 [6.147,6.249) #> 9 [6.304,6.422) #> 10 [-Inf,5.744) #> # ℹ 74 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":null,"dir":"Reference","previous_headings":"","what":"Discretize numeric variables with XgBoost — step_discretize_xgb","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step_discretize_xgb() creates specification recipe step discretize numeric data (e.g. integers doubles) bins supervised way using XgBoost model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"","code":"step_discretize_xgb( recipe, ..., role = NA, trained = FALSE, outcome = NULL, sample_val = 0.2, learn_rate = 0.3, num_breaks = 10, tree_depth = 1, min_n = 5, rules = NULL, skip = FALSE, id = rand_id(\"discretize_xgb\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. role Defaults \"predictor\". trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome train XgBoost models order discretize explanatory variables. sample_val Share data used validation (early stopping) learned splits (rest used training). Defaults 0.20. learn_rate rate boosting algorithm adapts iteration--iteration. Corresponds eta xgboost package. Defaults 0.3. num_breaks maximum number discrete bins bucket continuous features. Corresponds max_bin xgboost package. Defaults 10. tree_depth maximum depth tree (.e. number splits). Corresponds max_depth xgboost package. Defaults 1. min_n minimum number instances needed node. Corresponds min_child_weight xgboost package. Defaults 5. rules splitting rules best XgBoost tree retain variable. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step_discretize_xgb() creates non-uniform bins numerical variables utilizing information outcome variable applying xgboost model. advised impute missing values step. step intended used particularly linear models thanks creating non-uniform bins becomes easier learn non-linear patterns data. best selection buckets variable selected using internal early stopping scheme implemented xgboost package, makes discretization method prone overfitting. pre-defined values underlying xgboost learns good reasonably complex results. However, one wishes tune recommended path first start changing value num_breaks e.g.: 20 30. give satisfactory results one experiment modifying tree_depth min_n parameters. Note recommended tune learn_rate simultaneously parameters. step requires xgboost package. installed, step stop note installing package. Note original data replaced new bins.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"tidy() step, tibble retruned columns terms, value, id: terms character, selectors variables selected value numeric, location splits id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step 5 tuning parameters: sample_val: Proportion data validation (type: double, default: 0.2) learn_rate: Learning Rate (type: double, default: 0.3) num_breaks: Number Cut Points (type: integer, default: 10) tree_depth: Tree Depth (type: integer, default: 1) min_n: Minimal Node Size (type: integer, default: 5)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"","code":"library(rsample) library(recipes) data(credit_data, package = \"modeldata\") set.seed(1234) split <- initial_split(credit_data[1:1000, ], strata = \"Status\") credit_data_tr <- training(split) credit_data_te <- testing(split) xgb_rec <- recipe(Status ~ Income + Assets, data = credit_data_tr) %>% step_impute_median(Income, Assets) %>% step_discretize_xgb(Income, Assets, outcome = \"Status\") xgb_rec <- prep(xgb_rec, training = credit_data_tr) bake(xgb_rec, credit_data_te, Assets) #> # A tibble: 251 × 1 #> Assets #> #> 1 [3000,4000) #> 2 [3000,4000) #> 3 [9500, Inf] #> 4 [3000,4000) #> 5 [-Inf,2500) #> 6 [-Inf,2500) #> 7 [-Inf,2500) #> 8 [4000,4500) #> 9 [-Inf,2500) #> 10 [3000,4000) #> # ℹ 241 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":null,"dir":"Reference","previous_headings":"","what":"Encoding Factors into Multiple Columns — step_embed","title":"Encoding Factors into Multiple Columns — step_embed","text":"step_embed() creates specification recipe step convert nominal (.e. factor) predictor set scores derived tensorflow model via word-embedding model. embed_control simple wrapper setting default options.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Encoding Factors into Multiple Columns — step_embed","text":"","code":"step_embed( recipe, ..., role = \"predictor\", trained = FALSE, outcome = NULL, predictors = NULL, num_terms = 2, hidden_units = 0, options = embed_control(), mapping = NULL, history = NULL, keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"embed\") ) embed_control( loss = \"mse\", metrics = NULL, optimizer = \"sgd\", epochs = 20, validation_split = 0, batch_size = 32, verbose = 0, callbacks = NULL )"},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Encoding Factors into Multiple Columns — step_embed","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_embed, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role model terms created step, analysis role assigned?. default, function assumes embedding variables created used predictors model. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome neural network. predictors optional call vars specify variables added additional predictors neural network. variables numeric perhaps centered scaled. num_terms integer number resulting variables. hidden_units integer number hidden units dense ReLu layer embedding output later. Use value zero intermediate layer (see Details ). options list options model fitting process. mapping list tibble results define encoding. NULL step trained recipes::prep(). history tibble convergence statistics term. NULL step trained recipes::prep(). keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify . optimizer, loss, metrics Arguments pass keras::compile() epochs, validation_split, batch_size, verbose, callbacks Arguments pass keras::fit()","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Encoding Factors into Multiple Columns — step_embed","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), several columns containing embed name.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Encoding Factors into Multiple Columns — step_embed","text":"Factor levels initially assigned random new variables variables used neural network optimize allocation levels new columns well estimating model predict outcome. See Section 6.1.2 Francois Allaire (2018) details. new variables mapped specific levels seen time model training extra instance variables used new levels factor. One model created call step_embed. terms given step estimated encoded model also contain predictors give predictors (). outcome numeric, linear activation function used last layer softmax used factor outcomes (number levels). example, keras code numeric outcome, one categorical predictor, hidden units used factor outcome used hidden units requested, code variables specified predictors added additional dense layer layer_flatten hidden layer. Also note may difficult obtain reproducible results using step due nature Tensorflow (see link References). tensorflow models run parallel within session (via foreach futures) parallel package. using recipes step caret, avoid parallel processing.","code":"keras_model_sequential() %>% layer_embedding( input_dim = num_factor_levels_x + 1, output_dim = num_terms, input_length = 1 ) %>% layer_flatten() %>% layer_dense(units = 1, activation = 'linear') keras_model_sequential() %>% layer_embedding( input_dim = num_factor_levels_x + 1, output_dim = num_terms, input_length = 1 ) %>% layer_flatten() %>% layer_dense(units = hidden_units, activation = \"relu\") %>% layer_dense(units = num_factor_levels_y, activation = 'softmax')"},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Encoding Factors into Multiple Columns — step_embed","text":"tidy() step, tibble retruned number columns embedding information, columns terms, levels, id: terms character, selectors variables selected levels character, levels variable id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Encoding Factors into Multiple Columns — step_embed","text":"step 2 tuning parameters: num_terms: # Model Terms (type: integer, default: 2) hidden_units: # Hidden Units (type: integer, default: 0)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Encoding Factors into Multiple Columns — step_embed","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Encoding Factors into Multiple Columns — step_embed","text":"Francois C Allaire JJ (2018) Deep Learning R, Manning \"Concatenate Embeddings Categorical Variables Keras\" https://flovv.github.io/Embeddings_with_keras_part2/","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Encoding Factors into Multiple Columns — step_embed","text":"","code":"data(grants, package = \"modeldata\") set.seed(1) grants_other <- sample_n(grants_other, 500) rec <- recipe(class ~ num_ci + sponsor_code, data = grants_other) %>% step_embed(sponsor_code, outcome = vars(class), options = embed_control(epochs = 10) )"},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":null,"dir":"Reference","previous_headings":"","what":"Dummy Variables Creation via Feature Hashing — step_feature_hash","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"step_feature_hash() deprecated favor textrecipes::step_dummy_hash(). function creates specification recipe step convert nominal data (e.g. character factors) one numeric binary columns using levels original data.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"","code":"step_feature_hash( recipe, ..., role = \"predictor\", trained = FALSE, num_hash = 2^6, preserve = deprecated(), columns = NULL, keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"feature_hash\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables step. See selections() details. role model terms created step, analysis role assigned? default, new columns created step original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_hash number resulting dummy variable columns. preserve Use keep_original_cols instead specify whether selected column(s) retained addition new dummy variables. columns character vector selected columns. NULL step trained recipes::prep(). keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"step_feature_hash() create set binary dummy variables factor character variable. values used determine row dummy variable assigned (opposed specific column value map ). Since method rely pre-determined assignment levels columns, new factor levels can added selected columns without issue. Missing values result missing values hashed columns. Note assignment levels hashing columns try maximize allocation. likely multiple levels column map hashed columns (even small data sets). Similarly, likely columns zeros. zero-variance filter (via recipes::step_zv()) recommended recipe uses hashed columns.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"tidy() step, tibble retruned columns terms id: terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"Weinberger, K, Dasgupta, J Langford, Smola, J Attenberg. 2009. \"Feature Hashing Large Scale Multitask Learning.\" Proceedings 26th Annual International Conference Machine Learning, 1113–20. ACM. Kuhn Johnson (2020) Feature Engineering Selection: Practical Approach Predictive Models. CRC/Chapman Hall https://bookdown.org/max/FES/encoding-predictors--many-categories.html","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"","code":"data(grants, package = \"modeldata\") rec <- recipe(class ~ sponsor_code, data = grants_other) %>% step_feature_hash( sponsor_code, num_hash = 2^6, keep_original_cols = TRUE ) %>% prep() #> Warning: `step_feature_hash()` was deprecated in embed 0.2.0. #> ℹ Please use `textrecipes::step_dummy_hash()` instead. # How many of the 298 locations ended up in each hash column? results <- bake(rec, new_data = NULL, starts_with(\"sponsor_code\")) %>% distinct() apply(results %>% select(-sponsor_code), 2, sum) %>% table() #> . #> 0 1 2 3 4 5 6 7 8 9 10 11 #> 3 6 6 7 12 8 7 6 4 3 1 1"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"step_lencode_bayes() creates specification recipe step convert nominal (.e. factor) predictor single set scores derived generalized linear model estimated using Bayesian analysis.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"","code":"step_lencode_bayes( recipe, ..., role = NA, trained = FALSE, outcome = NULL, options = list(seed = sample.int(10^5, 1)), verbose = FALSE, mapping = NULL, skip = FALSE, id = rand_id(\"lencode_bayes\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_lencode_bayes, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome generalized linear model. numeric two-level factors currently supported. options list options pass rstanarm::stan_glmer(). verbose logical control default printing rstanarm::stan_glmer(). mapping list tibble results define encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), value (encodings).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"factor predictor, generalized linear model fit outcome coefficients returned encoding. coefficients linear predictor scale , factor outcomes, log-odds units. coefficients created using intercept model , two factor outcomes used, log-odds reflect event interest first level factor. novel levels, slightly timmed average coefficients returned. hierarchical generalized linear model fit using rstanarm::stan_glmer() intercept via ... include family argument (automatically set step, unless passed options) well arguments given options argument step. Relevant options include chains, iter, cores, arguments priors (see links References ). prior_intercept argument effect amount shrinkage.","code":"stan_glmer(outcome ~ (1 | predictor), data = data, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"tidy() step, tibble retruned columns level, value, terms, id: level character, factor levels value numeric, encoding terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"Micci-Barreca D (2001) \"preprocessing scheme high-cardinality categorical attributes classification prediction problems,\" ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) \"vtreat: data.frame Processor Predictive Modeling,\" arXiv:1611.09477 \"Hierarchical Partial Pooling Repeated Binary Trials\" https://CRAN.R-project.org/package=rstanarm/vignettes/pooling.html \"Prior Distributions rstanarm Models\" http://mc-stan.org/rstanarm/reference/priors.html \"Estimating Generalized (Non-)Linear Models Group-Specific Terms rstanarm\" http://mc-stan.org/rstanarm/articles/glmer.html","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"","code":"library(recipes) library(dplyr) library(modeldata) data(grants) set.seed(1) grants_other <- sample_n(grants_other, 500) # \\donttest{ reencoded <- recipe(class ~ sponsor_code, data = grants_other) %>% step_lencode_bayes(sponsor_code, outcome = vars(class)) # }"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"step_lencode_glm() creates specification recipe step convert nominal (.e. factor) predictor single set scores derived generalized linear model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"","code":"step_lencode_glm( recipe, ..., role = NA, trained = FALSE, outcome = NULL, mapping = NULL, skip = FALSE, id = rand_id(\"lencode_glm\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_lencode_glm, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome generalized linear model. numeric two-level factors currently supported. mapping list tibble results define encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), value (encodings).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"factor predictor, generalized linear model fit outcome coefficients returned encoding. coefficients linear predictor scale , factor outcomes, log-odds units. coefficients created using intercept model , two factor outcomes used, log-odds reflect event interest first level factor. novel levels, slightly timmed average coefficients returned.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"tidy() step, tibble retruned columns level, value, terms, id: level character, factor levels value numeric, encoding terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"Micci-Barreca D (2001) \"preprocessing scheme high-cardinality categorical attributes classification prediction problems,\" ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) \"vtreat: data.frame Processor Predictive Modeling,\" arXiv:1611.09477","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"","code":"library(recipes) library(dplyr) library(modeldata) data(grants) set.seed(1) grants_other <- sample_n(grants_other, 500) # \\donttest{ reencoded <- recipe(class ~ sponsor_code, data = grants_other) %>% step_lencode_glm(sponsor_code, outcome = vars(class)) # }"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"step_lencode_mixed() creates specification recipe step convert nominal (.e. factor) predictor single set scores derived generalized linear mixed model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"","code":"step_lencode_mixed( recipe, ..., role = NA, trained = FALSE, outcome = NULL, options = list(verbose = 0), mapping = NULL, skip = FALSE, id = rand_id(\"lencode_mixed\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_lencode_mixed, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome generalized linear model. numeric two-level factors currently supported. options list options pass lme4::lmer() lme4::glmer(). mapping list tibble results define encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), value (encodings).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"factor predictor, generalized linear model fit outcome coefficients returned encoding. coefficients linear predictor scale , factor outcomes, log-odds units. coefficients created using intercept model , two factor outcomes used, log-odds reflect event interest first level factor. novel levels, slightly timmed average coefficients returned. hierarchical generalized linear model fit using lme4::lmer() lme4::glmer(), depending nature outcome, intercept via ... include family argument (automatically set step) well arguments given options argument step. Relevant options include control others.","code":"lmer(outcome ~ 1 + (1 | predictor), data = data, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"tidy() step, tibble retruned columns level, value, terms, id: level character, factor levels value numeric, encoding terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"Micci-Barreca D (2001) \"preprocessing scheme high-cardinality categorical attributes classification prediction problems,\" ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) \"vtreat: data.frame Processor Predictive Modeling,\" arXiv:1611.09477","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"","code":"library(recipes) library(dplyr) library(modeldata) data(grants) set.seed(1) grants_other <- sample_n(grants_other, 500) # \\donttest{ reencoded <- recipe(class ~ sponsor_code, data = grants_other) %>% step_lencode_mixed(sponsor_code, outcome = vars(class)) # }"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":null,"dir":"Reference","previous_headings":"","what":"Sparse PCA Signal Extraction — step_pca_sparse","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"step_pca_sparse() creates specification recipe step convert numeric data one principal components can zero coefficients.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"","code":"step_pca_sparse( recipe, ..., role = \"predictor\", trained = FALSE, num_comp = 5, predictor_prop = 1, options = list(), res = NULL, prefix = \"PC\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"pca_sparse\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables used compute components. See selections() details. tidy method, currently used. role model terms created step, analysis role assigned? default, function assumes new principal component columns created original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_comp number components retain new predictors. num_comp greater number columns number possible components, smaller value used. num_comp = 0 set transformation done selected variables stay unchanged, regardless value keep_original_cols. predictor_prop maximum number original predictors can non-zero coefficients PCA component (via regularization). options list options default method irlba::ssvd(). res rotation matrix preprocessing step trained prep(). prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables selected), value (loading), component.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"irlba package required step. installed, user prompted step defined. irlba::ssvd() function used encourage sparsity; documentation details method. argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names PC1 - PC9. num_comp = 101, names PC1 - PC101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"tidy() step, tibble retruned columns terms, value, component, id: terms character, selectors variables selected value numeric, variable loading component character, principle component id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"step 2 tuning parameters: num_comp: # Components (type: integer, default: 5) predictor_prop: Proportion Predictors (type: double, default: 1)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"underlying operation allow case weights.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"","code":"library(recipes) library(ggplot2) data(ad_data, package = \"modeldata\") ad_rec <- recipe(Class ~ ., data = ad_data) %>% step_zv(all_predictors()) %>% step_YeoJohnson(all_numeric_predictors()) %>% step_normalize(all_numeric_predictors()) %>% step_pca_sparse( all_numeric_predictors(), predictor_prop = 0.75, num_comp = 3, id = \"sparse pca\" ) %>% prep() tidy(ad_rec, id = \"sparse pca\") %>% mutate(value = ifelse(value == 0, NA, value)) %>% ggplot(aes(x = component, y = terms, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.y = element_blank())"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":null,"dir":"Reference","previous_headings":"","what":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"step_pca_sparse_bayes() creates specification recipe step convert numeric data one principal components can zero coefficients.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"","code":"step_pca_sparse_bayes( recipe, ..., role = \"predictor\", trained = FALSE, num_comp = 5, prior_slab_dispersion = 1, prior_mixture_threshold = 0.1, options = list(), res = NULL, prefix = \"PC\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"pca_sparse_bayes\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables used compute components. See selections() details. tidy method, currently used. role model terms created step, analysis role assigned? default, function assumes new principal component columns created original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_comp number components retain new predictors. num_comp greater number columns number possible components, smaller value used. num_comp = 0 set transformation done selected variables stay unchanged, regardless value keep_original_cols. prior_slab_dispersion value proportional dispersion (scale) parameter slab portion prior. Smaller values result increase zero coefficients. prior_mixture_threshold parameter defines trade-spike slab components prior. Increasing parameter increases number zero coefficients. options list options default method VBsparsePCA::VBsparsePCA(). res rotation matrix preprocessing step trained prep(). prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables selected), value (loading), component.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"VBsparsePCA package required step. installed, user prompted step defined. spike--slab prior mixture two priors. One (\"spike\") mass zero represents variable contribution PCA coefficients. prior broader distribution reflects coefficient distribution variables affect PCA analysis. \"slab\". narrower slab, likely coefficient zero (regularized closer zero). mixture two priors governed mixing parameter, prior distribution hyper-parameter prior. PCA coefficients resulting scores unique sign. step attempt make sign components consistent run--run. However, sparsity constraint may interfere goal. argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names PC1 - PC9. num_comp = 101, names PC1 - PC101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"tidy() step, tibble retruned columns terms, value, component, id: terms character, selectors variables selected value numeric, variable loading component character, principle component id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"step 3 tuning parameters: num_comp: # Components (type: integer, default: 5) prior_slab_dispersion: Dispersion Slab Prior (type: double, default: 1) prior_mixture_threshold: Threshold Mixture Prior (type: double, default: 0.1)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"Ning, B. (2021). Spike slab Bayesian sparse principal component analysis. arXiv:2102.00305.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"","code":"library(recipes) library(ggplot2) data(ad_data, package = \"modeldata\") ad_rec <- recipe(Class ~ ., data = ad_data) %>% step_zv(all_predictors()) %>% step_YeoJohnson(all_numeric_predictors()) %>% step_normalize(all_numeric_predictors()) %>% step_pca_sparse_bayes( all_numeric_predictors(), prior_mixture_threshold = 0.95, prior_slab_dispersion = 0.05, num_comp = 3, id = \"sparse bayesian pca\" ) %>% prep() tidy(ad_rec, id = \"sparse bayesian pca\") %>% mutate(value = ifelse(value == 0, NA, value)) %>% ggplot(aes(x = component, y = terms, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.y = element_blank())"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":null,"dir":"Reference","previous_headings":"","what":"Truncated PCA Signal Extraction — step_pca_truncated","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"step_pca_truncated() creates specification recipe step convert numeric data one principal components. truncated calculates number components asked instead done recipes::step_pca().","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"","code":"step_pca_truncated( recipe, ..., role = \"predictor\", trained = FALSE, num_comp = 5, options = list(), res = NULL, columns = NULL, prefix = \"PC\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"pca_truncated\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables step. See selections() details. role model terms created step, analysis role assigned? default, new columns created step original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_comp number components retain new predictors. num_comp greater number columns number possible components, smaller value used. num_comp = 0 set transformation done selected variables stay unchanged, regardless value keep_original_cols. options list options default method irlba::prcomp_irlba(). Argument defaults set retx = FALSE, center = FALSE, scale. = FALSE, tol = NULL. Note argument x passed (). res irlba::prcomp_irlba() object stored preprocessing step trained prep(). columns character string selected variable names. field placeholder populated prep() used. prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"Principal component analysis (PCA) transformation group variables produces new set artificial features components. components designed capture maximum amount information (.e. variance) original variables. Also, components statistically independent one another. means can used combat large inter-variables correlations data set. advisable standardize variables prior running PCA. , variable centered scaled prior PCA calculation. can changed using options argument using step_center() step_scale(). argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names PC1 - PC9. num_comp = 101, names PC1 - PC101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"tidy() step two things can happen depending type argument. type = \"coef\" tibble returned 4 columns terms, value, component , id: terms character, selectors variables selected value numeric, variable loading component character, principle component id character, id step type = \"variance\" tibble returned 4 columns terms, value, component , id: terms character, type variance value numeric, value variance component integer, principle component id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"step 1 tuning parameters: num_comp: # Components (type: integer, default: 5)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"step performs unsupervised operation can utilize case weights. result, case weights used frequency weights. information, see documentation case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"Jolliffe, . T. (2010). Principal Component Analysis. Springer.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"","code":"rec <- recipe(~., data = mtcars) pca_trans <- rec %>% step_normalize(all_numeric()) %>% step_pca_truncated(all_numeric(), num_comp = 2) pca_estimates <- prep(pca_trans, training = mtcars) pca_data <- bake(pca_estimates, mtcars) rng <- extendrange(c(pca_data$PC1, pca_data$PC2)) plot(pca_data$PC1, pca_data$PC2, xlim = rng, ylim = rng ) tidy(pca_trans, number = 2) #> # A tibble: 1 × 4 #> terms value component id #> #> 1 all_numeric() NA NA pca_truncated_AGa8C tidy(pca_estimates, number = 2) #> # A tibble: 22 × 4 #> terms value component id #> #> 1 mpg 0.363 PC1 pca_truncated_AGa8C #> 2 cyl -0.374 PC1 pca_truncated_AGa8C #> 3 disp -0.368 PC1 pca_truncated_AGa8C #> 4 hp -0.330 PC1 pca_truncated_AGa8C #> 5 drat 0.294 PC1 pca_truncated_AGa8C #> 6 wt -0.346 PC1 pca_truncated_AGa8C #> 7 qsec 0.200 PC1 pca_truncated_AGa8C #> 8 vs 0.307 PC1 pca_truncated_AGa8C #> 9 am 0.235 PC1 pca_truncated_AGa8C #> 10 gear 0.207 PC1 pca_truncated_AGa8C #> # ℹ 12 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"step_umap() creates specification recipe step project set features smaller space.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"","code":"step_umap( recipe, ..., role = \"predictor\", trained = FALSE, outcome = NULL, neighbors = 15, num_comp = 2, min_dist = 0.01, metric = \"euclidean\", learn_rate = 1, epochs = NULL, initial = \"spectral\", target_weight = 0.5, options = list(verbose = FALSE, n_threads = 1), seed = sample(10^5, 2), prefix = \"UMAP\", keep_original_cols = FALSE, retain = deprecated(), object = NULL, skip = FALSE, id = rand_id(\"umap\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables step. See selections() details. role model terms created step, analysis role assigned? default, new columns created step original variables used predictors model. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome encoding process (). neighbors integer number nearest neighbors used construct target simplicial set. neighbors greater number data points, smaller value used. num_comp integer number UMAP components. num_comp greater number selected columns minus one, smaller value used. min_dist effective minimum distance embedded points. metric Character, type distance metric use find nearest neighbors. See uwot::umap() details. Default \"euclidean\". learn_rate Positive number learning rate optimization process. epochs Number iterations neighbor optimization. See uwot::umap() details. initial Character, Type initialization coordinates. Can one \"spectral\", \"normlaplacian\", \"random\", \"lvrandom\", \"laplacian\", \"pca\", \"spca\", \"agspectral\", matrix initial coordinates. See uwot::umap() details. Default \"spectral\". target_weight Weighting factor data topology target topology. value 0.0 weights entirely data, value 1.0 weights entirely target. default 0.5 balances weighting equally data target. options list options pass uwot::umap(). arguments X, n_neighbors, n_components, min_dist, n_epochs, ret_model, learning_rate passed . default, verbose n_threads set. seed Two integers control random numbers used numerical methods. default pulls main session's stream numbers give reproducible results seed set prior calling prep() bake(). prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. retain Use keep_original_cols instead specify whether original predictors retained along new embedding variables. object object defines encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"UMAP, short Uniform Manifold Approximation Projection, nonlinear dimension reduction technique finds local, low-dimensional representations data. can run unsupervised supervised different types outcome data (e.g. numeric, factor, etc). argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names UMAP1 - UMAP9. num_comp = 101, names UMAP1 - UMAP101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"tidy() step, tibble retruned columns terms id: terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"step 5 tuning parameters: num_comp: # Components (type: integer, default: 2) neighbors: # Nearest Neighbors (type: integer, default: 15) min_dist: Min Distance Points (type: double, default: 0.01) learn_rate: Learning Rate (type: double, default: 1) epochs: # Epochs (type: integer, default: NULL)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"saving-prepped-recipe-object","dir":"Reference","previous_headings":"","what":"Saving prepped recipe object","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"recipe step may require native serialization saving use another R session. learn serialization prepped recipes, see bundle package.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"McInnes, L., & Healy, J. (2018). UMAP: Uniform Manifold Approximation Projection Dimension Reduction. https://arxiv.org/abs/1802.03426. \"UMAP Works\" https://umap-learn.readthedocs.io/en/latest/how_umap_works.html","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"","code":"if (FALSE) { # rlang::is_installed(\"ggplot2\") && rlang::is_installed(\"irlba\", version = \"2.3.5.2\") library(recipes) library(ggplot2) split <- seq.int(1, 150, by = 9) tr <- iris[-split, ] te <- iris[split, ] set.seed(11) supervised <- recipe(Species ~ ., data = tr) %>% step_center(all_predictors()) %>% step_scale(all_predictors()) %>% step_umap(all_predictors(), outcome = vars(Species), num_comp = 2) %>% prep(training = tr) theme_set(theme_bw()) bake(supervised, new_data = te, Species, starts_with(\"umap\")) %>% ggplot(aes(x = UMAP1, y = UMAP2, col = Species)) + geom_point(alpha = .5) }"},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":null,"dir":"Reference","previous_headings":"","what":"Weight of evidence transformation — step_woe","title":"Weight of evidence transformation — step_woe","text":"step_woe() creates specification recipe step transform nominal data numerical transformation based weights evidence binary outcome.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Weight of evidence transformation — step_woe","text":"","code":"step_woe( recipe, ..., role = \"predictor\", outcome, trained = FALSE, dictionary = NULL, Laplace = 1e-06, prefix = \"woe\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"woe\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Weight of evidence transformation — step_woe","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables used compute components. See selections() details. tidy method, currently used. role model terms created step, analysis role assigned?. default, function assumes new woe components columns created original variables used predictors model. outcome bare name binary outcome encased vars(). trained logical indicate quantities preprocessing estimated. dictionary tbl. map levels woe values. must layout output returned dictionary(). NULL function build dictionary variables passed .... See dictionary() details. Laplace Laplace smoothing parameter. value usually applied avoid -Inf/Inf predictor category one outcome class. Set 0 allow Inf/-Inf. default 1e-6. Also known 'pseudocount' parameter Laplace smoothing technique. prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Weight of evidence transformation — step_woe","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble woe dictionary used map categories woe values.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Weight of evidence transformation — step_woe","text":"WoE transformation group variables produces new set features. formula $$woe_c = log((P(X = c|Y = 1))/(P(X = c|Y = 0)))$$ \\(c\\) goes 1 \\(C\\) levels given nominal predictor variable \\(X\\). components designed transform nominal variables numerical ones property order magnitude reflects association binary outcome. apply numerical predictors, advisable discretize variables prior running WoE. , variable binarized woe associated later. can achieved using step_discretize(). argument Laplace small quantity added proportions 1's 0's goal avoid log(p/0) log(0/p) results. numerical woe versions names begin woe_ followed respective original name variables. See Good (1985). One can pass custom dictionary tibble step_woe(). must structure output dictionary() (see examples). provided created automatically. role tibble store map levels nominal predictor woe values. may want tweak object goal fix orders levels one given predictor. One easy way tweaking output returned dictionary().","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Weight of evidence transformation — step_woe","text":"tidy() step, tibble columns terms (selectors variables selected), value, n_tot, n_bad, n_good, p_bad, p_good, woe outcome returned.. See dictionary() information. tidy() step, tibble retruned columns terms value, n_tot, n_bad, n_good, p_bad, p_good, woe outcome id: terms character, selectors variables selected value character, level outcome n_tot integer, total number n_bad integer, number bad examples n_good integer, number good examples p_bad numeric, p bad examples p_good numeric, p good examples woe numeric, weight evidence outcome character, name outcome variable id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Weight of evidence transformation — step_woe","text":"step 1 tuning parameters: Laplace: Laplace Correction (type: double, default: 1e-06)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Weight of evidence transformation — step_woe","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Weight of evidence transformation — step_woe","text":"Kullback, S. (1959). Information Theory Statistics. Wiley, New York. Hastie, T., Tibshirani, R. Friedman, J. (1986). Elements Statistical Learning, Second Edition, Springer, 2009. Good, . J. (1985), \"Weight evidence: brief survey\", Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Weight of evidence transformation — step_woe","text":"","code":"library(modeldata) data(\"credit_data\") set.seed(111) in_training <- sample(1:nrow(credit_data), 2000) credit_tr <- credit_data[in_training, ] credit_te <- credit_data[-in_training, ] rec <- recipe(Status ~ ., data = credit_tr) %>% step_woe(Job, Home, outcome = vars(Status)) woe_models <- prep(rec, training = credit_tr) #> Warning: Some columns used by `step_woe()` have categories with less than 10 values: 'Home', 'Job' # the encoding: bake(woe_models, new_data = credit_te %>% slice(1:5), starts_with(\"woe\")) #> # A tibble: 5 × 2 #> woe_Job woe_Home #> #> 1 -0.451 0.519 #> 2 0.187 -0.512 #> 3 -0.451 -0.512 #> 4 0.187 -0.512 #> 5 1.51 -0.0519 # the original data credit_te %>% slice(1:5) %>% dplyr::select(Job, Home) #> Job Home #> 1 fixed rent #> 2 freelance owner #> 3 fixed owner #> 4 freelance owner #> 5 partime parents # the details: tidy(woe_models, number = 1) #> # A tibble: 12 × 10 #> terms value n_tot n_bad n_good p_bad p_good woe outcome id #> #> 1 Job fixed 1261 273 988 0.451 0.708 -0.451 Status woe_… #> 2 Job freelan… 463 159 304 0.263 0.218 0.187 Status woe_… #> 3 Job others 74 39 35 0.0645 0.0251 0.944 Status woe_… #> 4 Job partime 201 133 68 0.220 0.0487 1.51 Status woe_… #> 5 Job NA 1 1 0 0.00165 0 14.7 Status woe_… #> 6 Home ignore 8 4 4 0.00661 0.00287 0.835 Status woe_… #> 7 Home other 161 78 83 0.129 0.0595 0.773 Status woe_… #> 8 Home owner 931 192 739 0.317 0.530 -0.512 Status woe_… #> 9 Home parents 336 98 238 0.162 0.171 -0.0519 Status woe_… #> 10 Home priv 113 42 71 0.0694 0.0509 0.310 Status woe_… #> 11 Home rent 446 188 258 0.311 0.185 0.519 Status woe_… #> 12 Home NA 5 3 2 0.00496 0.00143 1.24 Status woe_… # Example of custom dictionary + tweaking # custom dictionary woe_dict_custom <- credit_tr %>% dictionary(Job, Home, outcome = \"Status\") woe_dict_custom[4, \"woe\"] <- 1.23 # tweak # passing custom dict to step_woe() rec_custom <- recipe(Status ~ ., data = credit_tr) %>% step_woe( Job, Home, outcome = vars(Status), dictionary = woe_dict_custom ) %>% prep() #> Warning: Some columns used by `step_woe()` have categories with less than 10 values: 'Home', 'Job' rec_custom_baked <- bake(rec_custom, new_data = credit_te) rec_custom_baked %>% dplyr::filter(woe_Job == 1.23) %>% head() #> # A tibble: 6 × 14 #> Seniority Time Age Marital Records Expenses Income Assets Debt #> #> 1 0 48 41 married no 90 80 0 0 #> 2 0 18 21 single yes 35 50 0 0 #> 3 0 36 23 single no 45 122 2500 0 #> 4 14 24 51 married no 75 198 1000 0 #> 5 1 60 26 single no 35 120 0 0 #> 6 1 36 24 married no 76 164 0 0 #> # ℹ 5 more variables: Amount , Price , Status , #> # woe_Job , woe_Home "},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":null,"dir":"Reference","previous_headings":"","what":"tunable methods for embed — tunable.step_discretize_cart","title":"tunable methods for embed — tunable.step_discretize_cart","text":"functions define parameters can tuned specific steps. also define recommended objects dials package can used generate new parameter values characteristics.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"tunable methods for embed — tunable.step_discretize_cart","text":"","code":"# S3 method for class 'step_discretize_cart' tunable(x, ...) # S3 method for class 'step_discretize_xgb' tunable(x, ...) # S3 method for class 'step_embed' tunable(x, ...) # S3 method for class 'step_pca_sparse' tunable(x, ...) # S3 method for class 'step_pca_sparse_bayes' tunable(x, ...) # S3 method for class 'step_umap' tunable(x, ...) # S3 method for class 'step_woe' tunable(x, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"tunable methods for embed — tunable.step_discretize_cart","text":"x recipe step object ... used.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"tunable methods for embed — tunable.step_discretize_cart","text":"tibble object.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":null,"dir":"Reference","previous_headings":"","what":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"Calculates summaries WoE (Weight Evidence) binary outcome given predictor variable. Used biuld dictionary.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"","code":"woe_table(predictor, outcome, Laplace = 1e-06, call = rlang::caller_env(0))"},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"predictor atomic vector, usualy distinct values. outcome dependent variable. atomic vector exactly 2 distinct values. Laplace pseudocount parameter Laplace Smoothing estimator. Default 1e-6. Value avoid -Inf/Inf predictor category one outcome class. Set 0 allow Inf/-Inf. call execution environment currently running function, e.g. caller_env(). function mentioned error messages source error. See call argument rlang::abort() information.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"tibble counts, proportions woe. Warning: woe can possibly -Inf. Use 'Laplace' arg avoid .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"Kullback, S. (1959). Information Theory Statistics. Wiley, New York. Hastie, T., Tibshirani, R. Friedman, J. (1986). Elements Statistical Learning, Second Edition, Springer, 2009. Good, . J. (1985), \"Weight evidence: brief survey\", Bayesian Statistics, 2, pp.249-270.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-114","dir":"Changelog","previous_headings":"","what":"embed 1.1.4","title":"embed 1.1.4","text":"CRAN release: 2024-03-20","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"improvements-1-1-4","dir":"Changelog","previous_headings":"","what":"Improvements","title":"embed 1.1.4","text":"step_umap() gained initial target_weight arguments. (#213) Calling ?tidy.step_*() now sends documentation step_*() outcome documented. (#216) Documentation tidy methods steps improved describe return value accurately. (#217) {keras} {tensorflow} moved Suggests instead Imports. (#218)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-113","dir":"Changelog","previous_headings":"","what":"embed 1.1.3","title":"embed 1.1.3","text":"CRAN release: 2023-10-28 step_collapse_stringdist() now return predictors factors. (#204) Fixed regression 1.1.2 step_lencode_glm() couldn’t used multiple columns.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-112","dir":"Changelog","previous_headings":"","what":"embed 1.1.2","title":"embed 1.1.2","text":"CRAN release: 2023-08-17","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"improvements-1-1-2","dir":"Changelog","previous_headings":"","what":"Improvements","title":"embed 1.1.2","text":"keep_original_cols argument added step_woe(). change mean every step produces new columns keep_original_cols argument. (#194) Many internal changes improve consistency slight speed increases.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"breaking-changes-1-1-2","dir":"Changelog","previous_headings":"","what":"Breaking Changes","title":"embed 1.1.2","text":"step_pca_sparse(), step_pca_truncated() step_pca_sparse_bayes() now returns data unaltered num_comp = 0. done consistent recipes steps nature. (#190)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-111","dir":"Changelog","previous_headings":"","what":"embed 1.1.1","title":"embed 1.1.1","text":"CRAN release: 2023-05-30","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"bug-fixes-1-1-1","dir":"Changelog","previous_headings":"","what":"Bug Fixes","title":"embed 1.1.1","text":"Fixed bug step_pca_truncated() didn’t work zero selection. (#181) tidy() methods step_discretize_cart(), step_discretize_xgb(), step_embed(), step_feature_hash(), step_lencode_bayes(), step_lencode_glm(), step_lencode_mixed(), step_pca_sparse(), step_pca_sparse_bayes(), step_pca_truncated(), step_umap(), step_woe() now correctly return zero-row tibbles used empty selections. (#181)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-110","dir":"Changelog","previous_headings":"","what":"embed 1.1.0","title":"embed 1.1.0","text":"CRAN release: 2023-04-14","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"new-steps-1-1-0","dir":"Changelog","previous_headings":"","what":"New Steps","title":"embed 1.1.0","text":"step_pca_truncated() added. step calculates components required, speedup cases used many variables. (#82)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"improvements-1-1-0","dir":"Changelog","previous_headings":"","what":"Improvements","title":"embed 1.1.0","text":"step_collapse_stringdist() gained method options arguments allow different types string distance calculations. (#152) step_umap() gained argument metric. (#154) step_embed() gained keep_original_cols argument. (#176) steps now required_pkgs() methods. Steps tunable arguments now arguments listed documentation. steps add new columns now informatively error name collision occurs.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-100","dir":"Changelog","previous_headings":"","what":"embed 1.0.0","title":"embed 1.0.0","text":"CRAN release: 2022-07-02 step_collapse_cart() can pool predictor’s factor levels using tree-based method. step_collapse_stringdist() can pool predictor’s factor levels using string distances. Case weights support added step_discretize_cart(), step_discretize_xgb(), step_lencode_bayes(), step_lencode_glm(), step_lencode_mixed().","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-020","dir":"Changelog","previous_headings":"","what":"embed 0.2.0","title":"embed 0.2.0","text":"CRAN release: 2022-04-13 step_embed() now correctly defaults random id word “embed”. (#102) step_feature_hash() soft deprecated embed favor step_dummy_hash() textrecipes. (#95) Steps now dedicated subsection detailing happens tidy() applied. (#105) Reorganize documentation recipe step tidy methods (#115). Fixed bug woe_table() step_woe() didn’t respect factor levels outcome. (109)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-015","dir":"Changelog","previous_headings":"","what":"embed 0.1.5","title":"embed 0.1.5","text":"CRAN release: 2021-11-24 Re-licensed package GPL-2 MIT. See consent copyright holders . tunable parameter ranges step_umap() changed neighbors, num_comp, min_dist prevent uwot segmentation faults. step also check see data dimensions consistent argument values. Two new PCA steps added, using sparse techniques estimation: step_pca_sparse() step_pca_sparse_bayes(). Updated use recipes_eval_select() recipes 0.1.17 (#85). Added prefix argument step_umap() harmonize recipes steps (#93). embed recipe steps now officially support empty selections aligned recipes, dplyr packages use tidyselect. step_woe() longer warns high-cardinality predictors recipe estimated. Instead warns categories fewer 10 data points training set. (#74)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-014","dir":"Changelog","previous_headings":"","what":"embed 0.1.4","title":"embed 0.1.4","text":"CRAN release: 2021-01-16 Minor release changes test cases CRAN get xgboost work Solaris configuration. lme4 rstanarm now Suggests list automatically installed embed. message written console packages missing associated steps functions invoked.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-013","dir":"Changelog","previous_headings":"","what":"embed 0.1.3","title":"embed 0.1.3","text":"CRAN release: 2020-11-12 changes enable better parallel processing windows.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-012","dir":"Changelog","previous_headings":"","what":"embed 0.1.2","title":"embed 0.1.2","text":"CRAN release: 2020-10-17 Changes enable better parallel processing windows.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-011","dir":"Changelog","previous_headings":"","what":"embed 0.1.1","title":"embed 0.1.1","text":"CRAN release: 2020-07-03 Changes tests get archive jail. Updated plumbing behind step_woe(). Due bug tensorflow, added “warm start” instigate TF session one currently exist.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-010","dir":"Changelog","previous_headings":"","what":"embed 0.1.0","title":"embed 0.1.0","text":"CRAN release: 2020-05-25 Changes dplyr 1.0.0","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"new-steps-0-1-0","dir":"Changelog","previous_headings":"","what":"New Steps","title":"embed 0.1.0","text":"step_discretize_xgb() step_discretize_cart() can used convert numeric predictors categorical using supervised binning methods based tree models. Thanks Konrad Semsch contribution. Added step_feature_hash() creating dummy variables using feature hashing.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"breaking-changes-0-1-0","dir":"Changelog","previous_headings":"","what":"Breaking Changes","title":"embed 0.1.0","text":"tidy.step_woe() now column names consistent recipe steps.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"bug-fixes-0-1-0","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"embed 0.1.0","text":"Fixed bug detecting TF version.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-006","dir":"Changelog","previous_headings":"","what":"embed 0.0.6","title":"embed 0.0.6","text":"CRAN release: 2020-03-17 Small changes base R’s stringsAsFactors change.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-005","dir":"Changelog","previous_headings":"","what":"embed 0.0.5","title":"embed 0.0.5","text":"CRAN release: 2020-01-07 example data now modeldata package. Small TF updates step_embed().","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-004","dir":"Changelog","previous_headings":"","what":"embed 0.0.4","title":"embed 0.0.4","text":"CRAN release: 2019-09-15 Methods added future generic called tunable(). outlines parameters step can/tuned. Small updates work different versions tidyr.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-003","dir":"Changelog","previous_headings":"","what":"embed 0.0.3","title":"embed 0.0.3","text":"CRAN release: 2019-07-12","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"new-steps-0-0-3","dir":"Changelog","previous_headings":"","what":"New Steps","title":"embed 0.0.3","text":"step_umap() added supervised unsupervised encodings. step_woe() created weight evidence encodings.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-002","dir":"Changelog","previous_headings":"","what":"embed 0.0.2","title":"embed 0.0.2","text":"CRAN release: 2018-11-19 mostly maintainence release compatible version 0.1.3 recipes.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"other-changes-0-0-2","dir":"Changelog","previous_headings":"","what":"Other Changes:","title":"embed 0.0.2","text":"package now depends generics pacakge get broom tidy methods. Karim Lahrichi added ability use callbacks fitting tensorflow models. PR","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-001","dir":"Changelog","previous_headings":"","what":"embed 0.0.1","title":"embed 0.0.1","text":"CRAN release: 2018-09-14 First CRAN version","code":""}]
+[{"path":[]},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://embed.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing to embed","title":"Contributing to embed","text":"outlines propose change embed. detailed info contributing , tidyverse packages, please see development contributing guide.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"fixing-typos","dir":"","previous_headings":"","what":"Fixing typos","title":"Contributing to embed","text":"can fix typos, spelling mistakes, grammatical errors documentation directly using GitHub web interface, long changes made source file. generally means ’ll need edit roxygen2 comments .R, .Rd file. can find .R file generates .Rd reading comment first line.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"bigger-changes","dir":"","previous_headings":"","what":"Bigger changes","title":"Contributing to embed","text":"want make bigger change, ’s good idea first file issue make sure someone team agrees ’s needed. ’ve found bug, please file issue illustrates bug minimal reprex (also help write unit test, needed).","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"pull-request-process","dir":"","previous_headings":"Bigger changes","what":"Pull request process","title":"Contributing to embed","text":"Fork package clone onto computer. haven’t done , recommend using usethis::create_from_github(\"topepo/embed\", fork = TRUE). Install development dependences devtools::install_dev_deps(), make sure package passes R CMD check running devtools::check(). R CMD check doesn’t pass cleanly, ’s good idea ask help continuing. Create Git branch pull request (PR). recommend using usethis::pr_init(\"brief-description--change\"). Make changes, commit git, create PR running usethis::pr_push(), following prompts browser. title PR briefly describe change. body PR contain Fixes #issue-number. user-facing changes, add bullet top NEWS.md (.e. just first header). Follow style described https://style.tidyverse.org/news.html.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"code-style","dir":"","previous_headings":"Bigger changes","what":"Code style","title":"Contributing to embed","text":"New code follow tidyverse style guide. can use styler package apply styles, please don’t restyle code nothing PR. use roxygen2, Markdown syntax, documentation. use testthat unit tests. Contributions test cases included easier accept.","code":""},{"path":"https://embed.tidymodels.org/dev/CONTRIBUTING.html","id":"code-of-conduct","dir":"","previous_headings":"","what":"Code of Conduct","title":"Contributing to embed","text":"Please note embed project released Contributor Code Conduct. contributing project agree abide terms.","code":""},{"path":"https://embed.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 embed authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://embed.tidymodels.org/dev/articles/Applications/GLM.html","id":"no-pooling","dir":"Articles > Applications","previous_headings":"","what":"No Pooling","title":"Using Generalized Linear Models","text":"case, effect sponsor code can estimated separately factor level. One method conducting estimation step fit logistic regression acceptance classification outcome sponsor code predictor. , log-odds naturally estimated logistic regression. data, recipe created step_lencode_glm used: tidy method can used extract encodings merged raw estimates: sponsor codes n > 1, estimates effectively : Note also effect used novel sponsor code future data sets average effect:","code":"grants_glm <- recipe(class ~ ., data = grants_other) %>% # specify the variable being encoded and the outcome step_lencode_glm(sponsor_code, outcome = vars(class)) %>% # estimate the effects prep(training = grants_other) glm_estimates <- tidy(grants_glm, number = 1) %>% dplyr::select(-terms, -id) glm_estimates ## # A tibble: 292 × 2 ## level value ## ## 1 100D 0.405 ## 2 101A -1.95 ## 3 103C -2.08 ## 4 105A -1.61 ## 5 107C 16.6 ## 6 10B 16.6 ## 7 111C -1.61 ## 8 112D 0.693 ## 9 113A 0 ## 10 118B 0 ## # ℹ 282 more rows glm_estimates <- glm_estimates %>% set_names(c(\"sponsor_code\", \"glm\")) %>% inner_join(props, by = \"sponsor_code\") glm_estimates %>% dplyr::filter(is.finite(log_odds)) %>% mutate(difference = log_odds - glm) %>% dplyr::select(difference) %>% summary() ## difference ## Min. :-1.332e-15 ## 1st Qu.:-2.220e-16 ## Median : 0.000e+00 ## Mean :-5.139e-17 ## 3rd Qu.: 1.604e-16 ## Max. : 8.882e-16 tidy(grants_glm, number = 1) %>% dplyr::filter(level == \"..new\") %>% select(-id) ## # A tibble: 1 × 3 ## level value terms ## ## 1 ..new -2.88 sponsor_code"},{"path":"https://embed.tidymodels.org/dev/articles/Applications/GLM.html","id":"partial-pooling","dir":"Articles > Applications","previous_headings":"","what":"Partial Pooling","title":"Using Generalized Linear Models","text":"method estimates effects using sponsor codes using hierarchical Bayesian generalized linear model. sponsor codes treated random set contributes random intercept previously used logistic regression. Partial pooling estimates effect combination separate empirical estimates log-odds prior distribution. sponsor codes small sample sizes, final estimate shrunken towards overall mean log-odds. makes sense since poor information estimating sponsor codes. sponsor codes many data points, estimates reply empirical estimates. page good discussion pooling using Bayesian models.","code":"# due to Matrix problems knitr::knit_exit()"},{"path":"https://embed.tidymodels.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Emil Hvitfeldt. Author, maintainer. Max Kuhn. Author. . Copyright holder, funder.","code":""},{"path":"https://embed.tidymodels.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Hvitfeldt E, Kuhn M (2024). embed: Extra Recipes Encoding Predictors. R package version 1.1.4.9000, https://github.com/tidymodels/embed, https://embed.tidymodels.org.","code":"@Manual{, title = {embed: Extra Recipes for Encoding Predictors}, author = {Emil Hvitfeldt and Max Kuhn}, year = {2024}, note = {R package version 1.1.4.9000, https://github.com/tidymodels/embed}, url = {https://embed.tidymodels.org}, }"},{"path":[]},{"path":"https://embed.tidymodels.org/dev/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"Extra Recipes for Encoding Predictors","text":"embed extra steps recipes package embedding predictors one numeric columns. Almost preprocessing methods supervised. steps available separate package step dependencies, rstanarm, lme4, keras, fairly heavy. steps handle categorical predictors: step_lencode_glm(), step_lencode_bayes(), step_lencode_mixed() estimate effect factor levels outcome estimates used new encoding. estimates estimated generalized linear model. step can executed without pooling (via glm) partial pooling (stan_glm lmer). Currently implemented numeric two-class outcomes. step_embed() uses keras::layer_embedding translate original C factor levels set D new variables (< C). model fitting routine optimizes factor levels mapped new variables well corresponding regression coefficients (.e., neural network weights) used new encodings. step_woe() creates new variables based weight evidence encodings. step_feature_hash() can create indicator variables using feature hashing. numeric predictors: step_umap() uses nonlinear transformation similar t-SNE can used project transformation new data. supervised unsupervised methods can used. step_discretize_xgb() step_discretize_cart() can make binned versions numeric predictors using supervised tree-based models. step_pca_sparse() step_pca_sparse_bayes() conduct feature extraction sparsity component loadings. references methods : Francois C Allaire JJ (2018) Deep Learning R, Manning Guo, C Berkhahn F (2016) “Entity Embeddings Categorical Variables” Micci-Barreca D (2001) “preprocessing scheme high-cardinality categorical attributes classification prediction problems,” ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) “vtreat: data.frame Processor Predictive Modeling” McInnes L Healy J (2018) UMAP: Uniform Manifold Approximation Projection Dimension Reduction Good, . J. (1985), “Weight evidence: brief survey”, Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/index.html","id":"getting-started","dir":"","previous_headings":"","what":"Getting Started","title":"Extra Recipes for Encoding Predictors","text":"two articles walk use embedding steps, using generalized linear models neural networks built via TensorFlow.","code":""},{"path":"https://embed.tidymodels.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Extra Recipes for Encoding Predictors","text":"install package: Note use steps, also install packages rstanarm lme4. steps work, may want use: get bug fix use feature development version, can install development version package GitHub.","code":"install.packages(\"embed\") install.packages(c(\"rpart\", \"xgboost\", \"rstanarm\", \"lme4\")) # install.packages(\"pak\") pak::pak(\"tidymodels/embed\")"},{"path":"https://embed.tidymodels.org/dev/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Extra Recipes for Encoding Predictors","text":"project released Contributor Code Conduct. contributing project, agree abide terms. questions discussions tidymodels packages, modeling, machine learning, please post RStudio Community. think encountered bug, please submit issue. Either way, learn create share reprex (minimal, reproducible example), clearly communicate code. Check details contributing guidelines tidymodels packages get help.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":null,"dir":"Reference","previous_headings":"","what":"Add WoE in a data frame — add_woe","title":"Add WoE in a data frame — add_woe","text":"tidyverse friendly way plug WoE versions set predictor variables given binary outcome.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add WoE in a data frame — add_woe","text":"","code":"add_woe(.data, outcome, ..., dictionary = NULL, prefix = \"woe\")"},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add WoE in a data frame — add_woe","text":".data tbl. data.frame plug new woe version columns. outcome bare name outcome variable. ... Bare names predictor variables, passed pass variables dplyr::select(). means can use helpers like starts_with() matches(). dictionary tbl. NULL function build dictionary variables passed .... can pass custom dictionary , see dictionary() details. prefix character string prefix resulting new variables.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add WoE in a data frame — add_woe","text":"tibble original columns .data plus woe columns wanted.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add WoE in a data frame — add_woe","text":"can pass custom dictionary add_woe(). must exactly structure output dictionary(). One easy way tweak output returned .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/add_woe.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add WoE in a data frame — add_woe","text":"","code":"mtcars %>% add_woe(\"am\", cyl, gear:carb) #> # A tibble: 32 × 14 #> mpg cyl disp hp drat wt qsec vs am gear carb #> #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 #> 5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 #> 6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 #> 7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 #> 8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 #> 9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 #> 10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 #> # ℹ 22 more rows #> # ℹ 3 more variables: woe_cyl , woe_gear , woe_carb "},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":null,"dir":"Reference","previous_headings":"","what":"Weight of evidence dictionary — dictionary","title":"Weight of evidence dictionary — dictionary","text":"Builds woe dictionary set predictor variables upon given binary outcome. Convenient make woe version given set predictor variables also allow one tweak woe values hand.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Weight of evidence dictionary — dictionary","text":"","code":"dictionary(.data, outcome, ..., Laplace = 1e-06)"},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Weight of evidence dictionary — dictionary","text":".data tbl. data.frame variables come . outcome bare name outcome variable exactly 2 distinct values. ... bare names predictor variables selectors accepted dplyr::select(). Laplace Default 1e-6. pseudocount parameter Laplace Smoothing estimator. Value avoid -Inf/Inf predictor category one outcome class. Set 0 allow Inf/-Inf.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Weight of evidence dictionary — dictionary","text":"tibble summaries woe every given predictor variable stacked .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Weight of evidence dictionary — dictionary","text":"can pass custom dictionary step_woe(). must exactly structure output dictionary(). One easy way tweaking output returned .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Weight of evidence dictionary — dictionary","text":"Kullback, S. (1959). Information Theory Statistics. Wiley, New York. Hastie, T., Tibshirani, R. Friedman, J. (1986). Elements Statistical Learning, Second Edition, Springer, 2009. Good, . J. (1985), \"Weight evidence: brief survey\", Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/dictionary.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Weight of evidence dictionary — dictionary","text":"","code":"mtcars %>% dictionary(\"am\", cyl, gear:carb) #> # A tibble: 12 × 9 #> variable predictor n_tot n_0 n_1 p_0 p_1 woe outcome #> #> 1 cyl 4 11 3 8 0.158 0.615 1.36 am #> 2 cyl 6 7 4 3 0.211 0.231 0.0918 am #> 3 cyl 8 14 12 2 0.632 0.154 -1.41 am #> 4 gear 3 15 15 0 0.789 0 -16.1 am #> 5 gear 4 12 4 8 0.211 0.615 1.07 am #> 6 gear 5 5 0 5 0 0.385 15.8 am #> 7 carb 1 7 3 4 0.158 0.308 0.667 am #> 8 carb 2 10 6 4 0.316 0.308 -0.0260 am #> 9 carb 3 3 3 0 0.158 0 -14.5 am #> 10 carb 4 10 7 3 0.368 0.231 -0.468 am #> 11 carb 6 1 0 1 0 0.0769 14.2 am #> 12 carb 8 1 0 1 0 0.0769 14.2 am"},{"path":"https://embed.tidymodels.org/dev/reference/embed-package.html","id":null,"dir":"Reference","previous_headings":"","what":"embed: Extra Recipes for Encoding Predictors — embed-package","title":"embed: Extra Recipes for Encoding Predictors — embed-package","text":"Predictors can converted one numeric representations using variety methods. Effect encodings using simple generalized linear models arXiv:1611.09477 nonlinear models arXiv:1604.06737 can used. also functions dimension reduction approaches.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/embed-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"embed: Extra Recipes for Encoding Predictors — embed-package","text":"Maintainer: Emil Hvitfeldt emil.hvitfeldt@posit.co (ORCID) Authors: Max Kuhn max@posit.co (ORCID) contributors: Posit Software, PBC [copyright holder, funder]","code":""},{"path":"https://embed.tidymodels.org/dev/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. generics required_pkgs, tidy, tunable","code":""},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":null,"dir":"Reference","previous_headings":"","what":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"Recipe-adjacent packages always list required package steps can function properly within parallel processing schemes.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"","code":"# S3 method for class 'step_collapse_cart' required_pkgs(x, ...) # S3 method for class 'step_collapse_stringdist' required_pkgs(x, ...) # S3 method for class 'step_discretize_cart' required_pkgs(x, ...) # S3 method for class 'step_discretize_xgb' required_pkgs(x, ...) # S3 method for class 'step_embed' required_pkgs(x, ...) # S3 method for class 'step_feature_hash' required_pkgs(x, ...) # S3 method for class 'step_lencode_bayes' required_pkgs(x, ...) # S3 method for class 'step_lencode_glm' required_pkgs(x, ...) # S3 method for class 'step_lencode_mixed' required_pkgs(x, ...) # S3 method for class 'step_pca_sparse' required_pkgs(x, ...) # S3 method for class 'step_pca_sparse_bayes' required_pkgs(x, ...) # S3 method for class 'step_pca_truncated' required_pkgs(x, ...) # S3 method for class 'step_umap' required_pkgs(x, ...) # S3 method for class 'step_woe' required_pkgs(x, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"x recipe step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/required_pkgs.embed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"S3 methods for tracking which additional packages are needed for steps. — required_pkgs.step_collapse_cart","text":"character vector","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":null,"dir":"Reference","previous_headings":"","what":"Compound solubility data — solubility","title":"Compound solubility data — solubility","text":"Compound solubility data","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Compound solubility data — solubility","text":"Tetko, ., Tanchuk, V., Kasheva, T., Villa, . (2001). Estimation aqueous solubility chemical compounds using E-state indices. Journal Chemical Information Computer Sciences, 41(6), 1488-1493. Huuskonen, J. (2000). Estimation aqueous solubility diverse set organic compounds based molecular topology. Journal Chemical Information Computer Sciences, 40(3), 773-777.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Compound solubility data — solubility","text":"solubility data frame","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compound solubility data — solubility","text":"Tetko et al. (2001) Huuskonen (2000) investigated set compounds corresponding experimental solubility values using complex sets descriptors. used linear regression neural network models estimate relationship chemical structure solubility. analyses, use 1267 compounds set understandable descriptors fall one three groups: 208 binary \"fingerprints\" indicate presence absence particular chemical sub-structure, 16 count descriptors (number bonds number Bromine atoms) 4 continuous descriptors (molecular weight surface area).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/solubility.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Compound solubility data — solubility","text":"","code":"data(solubility) str(solubility) #> tibble [1,267 × 229] (S3: tbl_df/tbl/data.frame) #> $ fp_001 : int [1:1267] 0 0 1 0 0 1 0 1 1 1 ... #> $ fp_002 : int [1:1267] 1 1 1 0 0 0 1 0 0 1 ... #> $ fp_003 : int [1:1267] 0 0 1 1 1 1 0 1 1 1 ... #> $ fp_004 : int [1:1267] 0 1 1 0 1 1 1 1 1 1 ... #> $ fp_005 : int [1:1267] 1 1 1 0 1 0 1 0 0 1 ... #> $ fp_006 : int [1:1267] 0 1 0 0 1 0 0 0 1 1 ... #> $ fp_007 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_008 : int [1:1267] 1 1 1 0 0 0 1 0 0 0 ... #> $ fp_009 : int [1:1267] 0 0 0 0 1 1 1 0 1 0 ... #> $ fp_010 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_011 : int [1:1267] 0 1 0 0 0 0 0 0 1 0 ... #> $ fp_012 : int [1:1267] 0 0 0 0 0 1 0 1 0 0 ... #> $ fp_013 : int [1:1267] 0 0 0 0 1 0 1 0 0 0 ... #> $ fp_014 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_015 : int [1:1267] 1 1 1 1 1 1 1 1 1 1 ... #> $ fp_016 : int [1:1267] 0 1 0 0 1 1 0 1 0 0 ... #> $ fp_017 : int [1:1267] 0 0 1 1 0 0 0 0 1 1 ... #> $ fp_018 : int [1:1267] 0 1 0 0 0 0 0 0 0 0 ... #> $ fp_019 : int [1:1267] 1 0 0 0 1 0 1 0 0 0 ... #> $ fp_020 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_021 : int [1:1267] 0 0 0 0 0 1 0 0 1 0 ... #> $ fp_022 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_023 : int [1:1267] 0 0 0 1 0 0 0 0 1 0 ... #> $ fp_024 : int [1:1267] 1 0 0 0 1 0 0 0 0 0 ... #> $ fp_025 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_026 : int [1:1267] 1 0 0 0 0 0 1 0 0 0 ... #> $ fp_027 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_028 : int [1:1267] 0 1 0 0 0 0 0 0 1 1 ... #> $ fp_029 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_030 : int [1:1267] 0 0 0 0 1 0 0 0 0 0 ... #> $ fp_031 : int [1:1267] 0 0 0 0 0 0 0 1 0 0 ... #> $ fp_032 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_033 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_034 : int [1:1267] 0 0 0 0 1 0 0 0 0 1 ... #> $ fp_035 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> $ fp_036 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_037 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> $ fp_038 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_039 : int [1:1267] 1 0 0 0 0 0 0 0 0 0 ... #> $ fp_040 : int [1:1267] 1 0 0 0 0 0 0 0 0 0 ... #> $ fp_041 : int [1:1267] 0 0 0 1 0 0 0 0 1 0 ... #> $ fp_042 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_043 : int [1:1267] 0 1 0 0 0 0 0 0 0 0 ... #> $ fp_044 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_045 : int [1:1267] 0 0 1 0 0 0 0 0 0 0 ... #> $ fp_046 : int [1:1267] 0 1 0 0 0 0 1 0 0 1 ... #> $ fp_047 : int [1:1267] 0 1 1 0 0 0 1 0 0 0 ... #> $ fp_048 : int [1:1267] 0 0 0 0 0 0 0 1 0 0 ... #> $ fp_049 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_050 : int [1:1267] 0 0 0 0 0 0 0 1 0 1 ... #> $ fp_051 : int [1:1267] 0 1 0 0 0 0 0 0 0 0 ... #> $ fp_052 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_053 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_054 : int [1:1267] 0 0 0 1 0 0 0 0 1 1 ... #> $ fp_055 : int [1:1267] 0 0 0 0 0 0 0 0 0 0 ... #> $ fp_056 : int [1:1267] 1 0 0 0 0 0 0 0 0 0 ... #> $ fp_057 : int [1:1267] 0 0 0 0 0 0 1 0 0 0 ... #> $ fp_058 : int [1:1267] 0 0 0 0 0 0 0 0 0 1 ... #> $ fp_059 : int [1:1267] 0 0 0 0 0 0 0 1 0 0 ... #> $ fp_060 : int [1:1267] 0 1 1 0 0 0 0 1 1 0 ... #> $ fp_061 : int [1:1267] 0 0 1 0 0 0 0 1 1 0 ... #> $ fp_062 : int [1:1267] 0 0 1 0 0 1 0 1 1 1 ... #> $ fp_063 : int [1:1267] 1 1 0 0 1 1 1 0 0 1 ... #> $ fp_064 : int [1:1267] 0 1 1 0 1 1 0 1 0 0 ... #> $ fp_065 : int [1:1267] 1 1 0 0 1 0 1 0 1 1 ... #> $ fp_066 : int [1:1267] 1 0 1 1 1 1 1 1 1 1 ... #> $ fp_067 : int [1:1267] 1 1 0 0 1 1 1 0 0 1 ... #> $ fp_068 : int [1:1267] 0 1 0 0 1 1 1 0 0 1 ... #> $ fp_069 : int [1:1267] 1 0 1 1 1 1 0 1 1 0 ... #> $ fp_070 : int [1:1267] 1 1 0 1 0 0 1 0 1 0 ... #> $ fp_071 : int [1:1267] 0 0 0 0 0 0 1 0 1 1 ... #> $ fp_072 : int [1:1267] 0 1 1 0 0 1 0 1 1 1 ... #> $ fp_073 : int [1:1267] 0 1 1 0 0 0 0 0 1 0 ... #> $ fp_074 : int [1:1267] 0 1 0 0 0 0 0 0 1 0 ... #> $ fp_075 : int [1:1267] 0 1 0 0 1 1 1 0 0 1 ... #> $ fp_076 : int [1:1267] 1 1 0 0 0 0 1 0 1 1 ... #> $ fp_077 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_078 : int [1:1267] 0 1 0 0 0 0 0 0 1 0 ... #> $ fp_079 : int [1:1267] 1 1 1 1 1 0 1 0 1 1 ... #> $ fp_080 : int [1:1267] 0 1 0 0 1 1 1 1 0 0 ... #> $ fp_081 : int [1:1267] 0 0 1 1 0 0 0 1 1 1 ... #> $ fp_082 : int [1:1267] 1 1 1 0 1 1 1 0 1 1 ... #> $ fp_083 : int [1:1267] 0 0 0 0 1 0 0 0 0 1 ... #> $ fp_084 : int [1:1267] 1 1 0 0 1 0 1 0 0 0 ... #> $ fp_085 : int [1:1267] 0 1 0 0 0 0 1 0 0 0 ... #> $ fp_086 : int [1:1267] 0 0 0 1 1 0 0 1 1 1 ... #> $ fp_087 : int [1:1267] 1 1 1 1 1 0 1 0 1 1 ... #> $ fp_088 : int [1:1267] 0 1 0 0 0 0 0 1 1 0 ... #> $ fp_089 : int [1:1267] 1 1 0 0 0 0 1 0 0 0 ... #> $ fp_090 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_091 : int [1:1267] 1 1 0 0 1 0 1 0 0 1 ... #> $ fp_092 : int [1:1267] 0 0 0 0 1 1 1 0 1 0 ... #> $ fp_093 : int [1:1267] 0 1 0 1 0 0 0 1 1 1 ... #> $ fp_094 : int [1:1267] 0 0 0 0 1 0 0 1 0 0 ... #> $ fp_095 : int [1:1267] 0 0 0 0 0 0 0 0 1 1 ... #> $ fp_096 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> $ fp_097 : int [1:1267] 1 1 0 0 0 0 1 0 1 0 ... #> $ fp_098 : int [1:1267] 0 0 1 0 0 0 0 1 0 0 ... #> $ fp_099 : int [1:1267] 0 0 0 0 0 0 0 0 1 0 ... #> [list output truncated]"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Collapsing of Factor Levels — step_collapse_cart","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"step_collapse_cart() creates specification recipe step can collapse factor levels smaller set using supervised tree.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"","code":"step_collapse_cart( recipe, ..., role = NA, trained = FALSE, outcome = NULL, cost_complexity = 1e-04, min_n = 5, results = NULL, skip = FALSE, id = rand_id(\"step_collapse_cart\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome train CART models order pool factor levels. cost_complexity non-negative value regulates complexity tree pruning occurs. Values near 0.1 usually correspond tree single splits. Values zero correspond unpruned tree. min_n integer many data points required make splits tree growing process. Larger values correspond less complex trees. results list results convert new factor levels. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"updated recipe step.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"step uses CART tree (classification regression) group existing factor levels potentially smaller set. changes levels factor predictor (tidy() method can used understand translation). different ways step able collapse levels. model fails , results level split, original factor levels retained. also cases \"admissible split\" means model find signal data.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"tidy() step, tibble retruned columns terms, old, new, id: terms character, selectors variables selected old character, old levels new character, new levels id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_cart.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Collapsing of Factor Levels — step_collapse_cart","text":"","code":"data(ames, package = \"modeldata\") ames$Sale_Price <- log10(ames$Sale_Price) rec <- recipe(Sale_Price ~ ., data = ames) %>% step_collapse_cart( Sale_Type, Garage_Type, Neighborhood, outcome = vars(Sale_Price) ) %>% prep() tidy(rec, number = 1) #> # A tibble: 45 × 4 #> terms old new id #> #> 1 Sale_Type \"ConLD\" Sale_Type_1 step_collapse_cart_SwlKL #> 2 Sale_Type \"ConLw\" Sale_Type_1 step_collapse_cart_SwlKL #> 3 Sale_Type \"Oth\" Sale_Type_1 step_collapse_cart_SwlKL #> 4 Sale_Type \"COD\" Sale_Type_2 step_collapse_cart_SwlKL #> 5 Sale_Type \"VWD\" Sale_Type_2 step_collapse_cart_SwlKL #> 6 Sale_Type \"ConLI\" Sale_Type_3 step_collapse_cart_SwlKL #> 7 Sale_Type \"WD \" Sale_Type_4 step_collapse_cart_SwlKL #> 8 Sale_Type \"CWD\" Sale_Type_5 step_collapse_cart_SwlKL #> 9 Sale_Type \"Con\" Sale_Type_6 step_collapse_cart_SwlKL #> 10 Sale_Type \"New\" Sale_Type_7 step_collapse_cart_SwlKL #> # ℹ 35 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":null,"dir":"Reference","previous_headings":"","what":"collapse factor levels using stringdist — step_collapse_stringdist","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"step_collapse_stringdist() creates specification recipe step collapse factor levels low stringdist .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"","code":"step_collapse_stringdist( recipe, ..., role = NA, trained = FALSE, distance = NULL, method = \"osa\", options = list(), results = NULL, columns = NULL, skip = FALSE, id = rand_id(\"collapse_stringdist\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. distance Integer, value determine strings collapsed . value used inclusive, 2 collapse levels string distance 2 lower. method Character, method distance calculation. default \"osa\", see stringdist::stringdist-metrics. options List, arguments passed stringdist::stringdistmatrix() weight, q, p, bt, used different values method. results list denoting way labels collapses stored preprocessing step trained prep(). columns character string variable names populated (eventually) terms argument. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (columns affected) base.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"tidy() step, tibble retruned columns terms, , , id: terms character, selectors variables selected character, old levels character, new levels id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_collapse_stringdist.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"collapse factor levels using stringdist — step_collapse_stringdist","text":"","code":"library(recipes) library(tibble) data0 <- tibble( x1 = c(\"a\", \"b\", \"d\", \"e\", \"sfgsfgsd\", \"hjhgfgjgr\"), x2 = c(\"ak\", \"b\", \"djj\", \"e\", \"hjhgfgjgr\", \"hjhgfgjgr\") ) rec <- recipe(~., data = data0) %>% step_collapse_stringdist(all_predictors(), distance = 1) %>% prep() rec %>% bake(new_data = NULL) #> # A tibble: 6 × 2 #> x1 x2 #> #> 1 a ak #> 2 a b #> 3 a djj #> 4 a b #> 5 sfgsfgsd hjhgfgjgr #> 6 hjhgfgjgr hjhgfgjgr tidy(rec, 1) #> # A tibble: 11 × 4 #> terms from to id #> #> 1 x1 a a collapse_stringdist_qIMPV #> 2 x1 b a collapse_stringdist_qIMPV #> 3 x1 d a collapse_stringdist_qIMPV #> 4 x1 e a collapse_stringdist_qIMPV #> 5 x1 hjhgfgjgr hjhgfgjgr collapse_stringdist_qIMPV #> 6 x1 sfgsfgsd sfgsfgsd collapse_stringdist_qIMPV #> 7 x2 ak ak collapse_stringdist_qIMPV #> 8 x2 b b collapse_stringdist_qIMPV #> 9 x2 e b collapse_stringdist_qIMPV #> 10 x2 djj djj collapse_stringdist_qIMPV #> 11 x2 hjhgfgjgr hjhgfgjgr collapse_stringdist_qIMPV rec <- recipe(~., data = data0) %>% step_collapse_stringdist(all_predictors(), distance = 2) %>% prep() rec %>% bake(new_data = NULL) #> # A tibble: 6 × 2 #> x1 x2 #> #> 1 a ak #> 2 a ak #> 3 a djj #> 4 a ak #> 5 sfgsfgsd hjhgfgjgr #> 6 hjhgfgjgr hjhgfgjgr tidy(rec, 1) #> # A tibble: 11 × 4 #> terms from to id #> #> 1 x1 a a collapse_stringdist_fLSSY #> 2 x1 b a collapse_stringdist_fLSSY #> 3 x1 d a collapse_stringdist_fLSSY #> 4 x1 e a collapse_stringdist_fLSSY #> 5 x1 hjhgfgjgr hjhgfgjgr collapse_stringdist_fLSSY #> 6 x1 sfgsfgsd sfgsfgsd collapse_stringdist_fLSSY #> 7 x2 ak ak collapse_stringdist_fLSSY #> 8 x2 b ak collapse_stringdist_fLSSY #> 9 x2 e ak collapse_stringdist_fLSSY #> 10 x2 djj djj collapse_stringdist_fLSSY #> 11 x2 hjhgfgjgr hjhgfgjgr collapse_stringdist_fLSSY"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":null,"dir":"Reference","previous_headings":"","what":"Discretize numeric variables with CART — step_discretize_cart","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step_discretize_cart() creates specification recipe step discretize numeric data (e.g. integers doubles) bins supervised way using CART model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Discretize numeric variables with CART — step_discretize_cart","text":"","code":"step_discretize_cart( recipe, ..., role = NA, trained = FALSE, outcome = NULL, cost_complexity = 0.01, tree_depth = 10, min_n = 20, rules = NULL, skip = FALSE, id = rand_id(\"discretize_cart\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Discretize numeric variables with CART — step_discretize_cart","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. role Defaults \"predictor\". trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome train CART models order discretize explanatory variables. cost_complexity regularization parameter. split decrease overall lack fit factor cost_complexity attempted. Corresponds cp rpart::rpart(). Defaults 0.01. tree_depth maximum depth final tree. Corresponds maxdepth rpart::rpart(). Defaults 10. min_n number data points node required continue splitting. Corresponds minsplit rpart::rpart(). Defaults 20. rules splitting rules best CART tree retain variable. length zero, splitting used column. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Discretize numeric variables with CART — step_discretize_cart","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step_discretize_cart() creates non-uniform bins numerical variables utilizing information outcome variable applying CART model. best selection buckets variable selected using standard cost-complexity pruning CART, makes discretization method resistant overfitting. step requires rpart package. installed, step stop note installing package. Note original data replaced new bins.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Discretize numeric variables with CART — step_discretize_cart","text":"tidy() step, tibble retruned columns terms, value, id: terms character, selectors variables selected value numeric, location splits id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step 3 tuning parameters: cost_complexity: Cost-Complexity Parameter (type: double, default: 0.01) tree_depth: Tree Depth (type: integer, default: 10) min_n: Minimal Node Size (type: integer, default: 20)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Discretize numeric variables with CART — step_discretize_cart","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_cart.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Discretize numeric variables with CART — step_discretize_cart","text":"","code":"library(modeldata) data(ad_data) library(rsample) split <- initial_split(ad_data, strata = \"Class\") ad_data_tr <- training(split) ad_data_te <- testing(split) cart_rec <- recipe(Class ~ ., data = ad_data_tr) %>% step_discretize_cart( tau, age, p_tau, Ab_42, outcome = \"Class\", id = \"cart splits\" ) cart_rec <- prep(cart_rec, training = ad_data_tr) # The splits: tidy(cart_rec, id = \"cart splits\") #> # A tibble: 16 × 3 #> terms value id #> #> 1 tau 5.74 cart splits #> 2 tau 5.79 cart splits #> 3 tau 5.89 cart splits #> 4 tau 6.00 cart splits #> 5 tau 6.15 cart splits #> 6 tau 6.25 cart splits #> 7 tau 6.30 cart splits #> 8 tau 6.42 cart splits #> 9 tau 6.66 cart splits #> 10 age 0.986 cart splits #> 11 age 0.987 cart splits #> 12 p_tau 3.90 cart splits #> 13 p_tau 4.62 cart splits #> 14 Ab_42 10.6 cart splits #> 15 Ab_42 11.2 cart splits #> 16 Ab_42 11.3 cart splits bake(cart_rec, ad_data_te, tau) #> # A tibble: 84 × 1 #> tau #> #> 1 [-Inf,5.744) #> 2 [6.147,6.249) #> 3 [6.664, Inf] #> 4 [5.995,6.147) #> 5 [-Inf,5.744) #> 6 [-Inf,5.744) #> 7 [6.422,6.664) #> 8 [6.147,6.249) #> 9 [6.304,6.422) #> 10 [-Inf,5.744) #> # ℹ 74 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":null,"dir":"Reference","previous_headings":"","what":"Discretize numeric variables with XgBoost — step_discretize_xgb","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step_discretize_xgb() creates specification recipe step discretize numeric data (e.g. integers doubles) bins supervised way using XgBoost model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"","code":"step_discretize_xgb( recipe, ..., role = NA, trained = FALSE, outcome = NULL, sample_val = 0.2, learn_rate = 0.3, num_breaks = 10, tree_depth = 1, min_n = 5, rules = NULL, skip = FALSE, id = rand_id(\"discretize_xgb\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables affected step. See selections() details. role Defaults \"predictor\". trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome train XgBoost models order discretize explanatory variables. sample_val Share data used validation (early stopping) learned splits (rest used training). Defaults 0.20. learn_rate rate boosting algorithm adapts iteration--iteration. Corresponds eta xgboost package. Defaults 0.3. num_breaks maximum number discrete bins bucket continuous features. Corresponds max_bin xgboost package. Defaults 10. tree_depth maximum depth tree (.e. number splits). Corresponds max_depth xgboost package. Defaults 1. min_n minimum number instances needed node. Corresponds min_child_weight xgboost package. Defaults 5. rules splitting rules best XgBoost tree retain variable. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step_discretize_xgb() creates non-uniform bins numerical variables utilizing information outcome variable applying xgboost model. advised impute missing values step. step intended used particularly linear models thanks creating non-uniform bins becomes easier learn non-linear patterns data. best selection buckets variable selected using internal early stopping scheme implemented xgboost package, makes discretization method prone overfitting. pre-defined values underlying xgboost learns good reasonably complex results. However, one wishes tune recommended path first start changing value num_breaks e.g.: 20 30. give satisfactory results one experiment modifying tree_depth min_n parameters. Note recommended tune learn_rate simultaneously parameters. step requires xgboost package. installed, step stop note installing package. Note original data replaced new bins.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"tidy() step, tibble retruned columns terms, value, id: terms character, selectors variables selected value numeric, location splits id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step 5 tuning parameters: sample_val: Proportion data validation (type: double, default: 0.2) learn_rate: Learning Rate (type: double, default: 0.3) num_breaks: Number Cut Points (type: integer, default: 10) tree_depth: Tree Depth (type: integer, default: 1) min_n: Minimal Node Size (type: integer, default: 5)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_discretize_xgb.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Discretize numeric variables with XgBoost — step_discretize_xgb","text":"","code":"library(rsample) library(recipes) data(credit_data, package = \"modeldata\") set.seed(1234) split <- initial_split(credit_data[1:1000, ], strata = \"Status\") credit_data_tr <- training(split) credit_data_te <- testing(split) xgb_rec <- recipe(Status ~ Income + Assets, data = credit_data_tr) %>% step_impute_median(Income, Assets) %>% step_discretize_xgb(Income, Assets, outcome = \"Status\") xgb_rec <- prep(xgb_rec, training = credit_data_tr) bake(xgb_rec, credit_data_te, Assets) #> # A tibble: 251 × 1 #> Assets #> #> 1 [3000,4000) #> 2 [3000,4000) #> 3 [9500, Inf] #> 4 [3000,4000) #> 5 [-Inf,2500) #> 6 [-Inf,2500) #> 7 [-Inf,2500) #> 8 [4000,4500) #> 9 [-Inf,2500) #> 10 [3000,4000) #> # ℹ 241 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":null,"dir":"Reference","previous_headings":"","what":"Encoding Factors into Multiple Columns — step_embed","title":"Encoding Factors into Multiple Columns — step_embed","text":"step_embed() creates specification recipe step convert nominal (.e. factor) predictor set scores derived tensorflow model via word-embedding model. embed_control simple wrapper setting default options.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Encoding Factors into Multiple Columns — step_embed","text":"","code":"step_embed( recipe, ..., role = \"predictor\", trained = FALSE, outcome = NULL, predictors = NULL, num_terms = 2, hidden_units = 0, options = embed_control(), mapping = NULL, history = NULL, keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"embed\") ) embed_control( loss = \"mse\", metrics = NULL, optimizer = \"sgd\", epochs = 20, validation_split = 0, batch_size = 32, verbose = 0, callbacks = NULL )"},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Encoding Factors into Multiple Columns — step_embed","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_embed, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role model terms created step, analysis role assigned?. default, function assumes embedding variables created used predictors model. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome neural network. predictors optional call vars specify variables added additional predictors neural network. variables numeric perhaps centered scaled. num_terms integer number resulting variables. hidden_units integer number hidden units dense ReLu layer embedding output later. Use value zero intermediate layer (see Details ). options list options model fitting process. mapping list tibble results define encoding. NULL step trained recipes::prep(). history tibble convergence statistics term. NULL step trained recipes::prep(). keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify . optimizer, loss, metrics Arguments pass keras::compile() epochs, validation_split, batch_size, verbose, callbacks Arguments pass keras::fit()","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Encoding Factors into Multiple Columns — step_embed","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), several columns containing embed name.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Encoding Factors into Multiple Columns — step_embed","text":"Factor levels initially assigned random new variables variables used neural network optimize allocation levels new columns well estimating model predict outcome. See Section 6.1.2 Francois Allaire (2018) details. new variables mapped specific levels seen time model training extra instance variables used new levels factor. One model created call step_embed. terms given step estimated encoded model also contain predictors give predictors (). outcome numeric, linear activation function used last layer softmax used factor outcomes (number levels). example, keras code numeric outcome, one categorical predictor, hidden units used factor outcome used hidden units requested, code variables specified predictors added additional dense layer layer_flatten hidden layer. Also note may difficult obtain reproducible results using step due nature Tensorflow (see link References). tensorflow models run parallel within session (via foreach futures) parallel package. using recipes step caret, avoid parallel processing.","code":"keras_model_sequential() %>% layer_embedding( input_dim = num_factor_levels_x + 1, output_dim = num_terms, input_length = 1 ) %>% layer_flatten() %>% layer_dense(units = 1, activation = 'linear') keras_model_sequential() %>% layer_embedding( input_dim = num_factor_levels_x + 1, output_dim = num_terms, input_length = 1 ) %>% layer_flatten() %>% layer_dense(units = hidden_units, activation = \"relu\") %>% layer_dense(units = num_factor_levels_y, activation = 'softmax')"},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Encoding Factors into Multiple Columns — step_embed","text":"tidy() step, tibble retruned number columns embedding information, columns terms, levels, id: terms character, selectors variables selected levels character, levels variable id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Encoding Factors into Multiple Columns — step_embed","text":"step 2 tuning parameters: num_terms: # Model Terms (type: integer, default: 2) hidden_units: # Hidden Units (type: integer, default: 0)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Encoding Factors into Multiple Columns — step_embed","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Encoding Factors into Multiple Columns — step_embed","text":"Francois C Allaire JJ (2018) Deep Learning R, Manning \"Concatenate Embeddings Categorical Variables Keras\" https://flovv.github.io/Embeddings_with_keras_part2/","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_embed.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Encoding Factors into Multiple Columns — step_embed","text":"","code":"data(grants, package = \"modeldata\") set.seed(1) grants_other <- sample_n(grants_other, 500) rec <- recipe(class ~ num_ci + sponsor_code, data = grants_other) %>% step_embed(sponsor_code, outcome = vars(class), options = embed_control(epochs = 10) )"},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":null,"dir":"Reference","previous_headings":"","what":"Dummy Variables Creation via Feature Hashing — step_feature_hash","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"step_feature_hash() deprecated favor textrecipes::step_dummy_hash(). function creates specification recipe step convert nominal data (e.g. character factors) one numeric binary columns using levels original data.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"","code":"step_feature_hash( recipe, ..., role = \"predictor\", trained = FALSE, num_hash = 2^6, preserve = deprecated(), columns = NULL, keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"feature_hash\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables step. See selections() details. role model terms created step, analysis role assigned? default, new columns created step original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_hash number resulting dummy variable columns. preserve Use keep_original_cols instead specify whether selected column(s) retained addition new dummy variables. columns character vector selected columns. NULL step trained recipes::prep(). keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"step_feature_hash() create set binary dummy variables factor character variable. values used determine row dummy variable assigned (opposed specific column value map ). Since method rely pre-determined assignment levels columns, new factor levels can added selected columns without issue. Missing values result missing values hashed columns. Note assignment levels hashing columns try maximize allocation. likely multiple levels column map hashed columns (even small data sets). Similarly, likely columns zeros. zero-variance filter (via recipes::step_zv()) recommended recipe uses hashed columns.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"tidy() step, tibble retruned columns terms id: terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"Weinberger, K, Dasgupta, J Langford, Smola, J Attenberg. 2009. \"Feature Hashing Large Scale Multitask Learning.\" Proceedings 26th Annual International Conference Machine Learning, 1113–20. ACM. Kuhn Johnson (2020) Feature Engineering Selection: Practical Approach Predictive Models. CRC/Chapman Hall https://bookdown.org/max/FES/encoding-predictors--many-categories.html","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_feature_hash.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Dummy Variables Creation via Feature Hashing — step_feature_hash","text":"","code":"data(grants, package = \"modeldata\") rec <- recipe(class ~ sponsor_code, data = grants_other) %>% step_feature_hash( sponsor_code, num_hash = 2^6, keep_original_cols = TRUE ) %>% prep() #> Warning: `step_feature_hash()` was deprecated in embed 0.2.0. #> ℹ Please use `textrecipes::step_dummy_hash()` instead. # How many of the 298 locations ended up in each hash column? results <- bake(rec, new_data = NULL, starts_with(\"sponsor_code\")) %>% distinct() apply(results %>% select(-sponsor_code), 2, sum) %>% table() #> . #> 0 1 2 3 4 5 6 7 8 9 10 #> 2 3 4 15 11 6 12 5 2 2 2"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"step_lencode_bayes() creates specification recipe step convert nominal (.e. factor) predictor single set scores derived generalized linear model estimated using Bayesian analysis.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"","code":"step_lencode_bayes( recipe, ..., role = NA, trained = FALSE, outcome = NULL, options = list(seed = sample.int(10^5, 1)), verbose = FALSE, mapping = NULL, skip = FALSE, id = rand_id(\"lencode_bayes\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_lencode_bayes, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome generalized linear model. numeric two-level factors currently supported. options list options pass rstanarm::stan_glmer(). verbose logical control default printing rstanarm::stan_glmer(). mapping list tibble results define encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), value (encodings).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"factor predictor, generalized linear model fit outcome coefficients returned encoding. coefficients linear predictor scale , factor outcomes, log-odds units. coefficients created using intercept model , two factor outcomes used, log-odds reflect event interest first level factor. novel levels, slightly timmed average coefficients returned. hierarchical generalized linear model fit using rstanarm::stan_glmer() intercept via ... include family argument (automatically set step, unless passed options) well arguments given options argument step. Relevant options include chains, iter, cores, arguments priors (see links References ). prior_intercept argument effect amount shrinkage.","code":"stan_glmer(outcome ~ (1 | predictor), data = data, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"tidy() step, tibble retruned columns level, value, terms, id: level character, factor levels value numeric, encoding terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"Micci-Barreca D (2001) \"preprocessing scheme high-cardinality categorical attributes classification prediction problems,\" ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) \"vtreat: data.frame Processor Predictive Modeling,\" arXiv:1611.09477 \"Hierarchical Partial Pooling Repeated Binary Trials\" https://CRAN.R-project.org/package=rstanarm/vignettes/pooling.html \"Prior Distributions rstanarm Models\" http://mc-stan.org/rstanarm/reference/priors.html \"Estimating Generalized (Non-)Linear Models Group-Specific Terms rstanarm\" http://mc-stan.org/rstanarm/articles/glmer.html","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_bayes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_bayes","text":"","code":"library(recipes) library(dplyr) library(modeldata) data(grants) set.seed(1) grants_other <- sample_n(grants_other, 500) # \\donttest{ reencoded <- recipe(class ~ sponsor_code, data = grants_other) %>% step_lencode_bayes(sponsor_code, outcome = vars(class)) # }"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"step_lencode_glm() creates specification recipe step convert nominal (.e. factor) predictor single set scores derived generalized linear model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"","code":"step_lencode_glm( recipe, ..., role = NA, trained = FALSE, outcome = NULL, mapping = NULL, skip = FALSE, id = rand_id(\"lencode_glm\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_lencode_glm, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome generalized linear model. numeric two-level factors currently supported. mapping list tibble results define encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), value (encodings).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"factor predictor, generalized linear model fit outcome coefficients returned encoding. coefficients linear predictor scale , factor outcomes, log-odds units. coefficients created using intercept model , two factor outcomes used, log-odds reflect event interest first level factor. novel levels, slightly timmed average coefficients returned.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"tidy() step, tibble retruned columns level, value, terms, id: level character, factor levels value numeric, encoding terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"Micci-Barreca D (2001) \"preprocessing scheme high-cardinality categorical attributes classification prediction problems,\" ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) \"vtreat: data.frame Processor Predictive Modeling,\" arXiv:1611.09477","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_glm.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Factor Conversions into Linear Functions using Likelihood Encodings — step_lencode_glm","text":"","code":"library(recipes) library(dplyr) library(modeldata) data(grants) set.seed(1) grants_other <- sample_n(grants_other, 500) # \\donttest{ reencoded <- recipe(class ~ sponsor_code, data = grants_other) %>% step_lencode_glm(sponsor_code, outcome = vars(class)) # }"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"step_lencode_mixed() creates specification recipe step convert nominal (.e. factor) predictor single set scores derived generalized linear mixed model.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"","code":"step_lencode_mixed( recipe, ..., role = NA, trained = FALSE, outcome = NULL, options = list(verbose = 0), mapping = NULL, skip = FALSE, id = rand_id(\"lencode_mixed\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables. step_lencode_mixed, indicates variables encoded numeric format. See recipes::selections() details. tidy method, currently used. role used step since new variables created. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome generalized linear model. numeric two-level factors currently supported. options list options pass lme4::lmer() lme4::glmer(). mapping list tibble results define encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables encoding), level (factor levels), value (encodings).","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"factor predictor, generalized linear model fit outcome coefficients returned encoding. coefficients linear predictor scale , factor outcomes, log-odds units. coefficients created using intercept model , two factor outcomes used, log-odds reflect event interest first level factor. novel levels, slightly timmed average coefficients returned. hierarchical generalized linear model fit using lme4::lmer() lme4::glmer(), depending nature outcome, intercept via ... include family argument (automatically set step) well arguments given options argument step. Relevant options include control others.","code":"lmer(outcome ~ 1 + (1 | predictor), data = data, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"tidy() step, tibble retruned columns level, value, terms, id: level character, factor levels value numeric, encoding terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"step performs supervised operation can utilize case weights. use , see documentation recipes::case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"Micci-Barreca D (2001) \"preprocessing scheme high-cardinality categorical attributes classification prediction problems,\" ACM SIGKDD Explorations Newsletter, 3(1), 27-32. Zumel N Mount J (2017) \"vtreat: data.frame Processor Predictive Modeling,\" arXiv:1611.09477","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_lencode_mixed.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised Factor Conversions into Linear Functions using Bayesian Likelihood Encodings — step_lencode_mixed","text":"","code":"library(recipes) library(dplyr) library(modeldata) data(grants) set.seed(1) grants_other <- sample_n(grants_other, 500) # \\donttest{ reencoded <- recipe(class ~ sponsor_code, data = grants_other) %>% step_lencode_mixed(sponsor_code, outcome = vars(class)) # }"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":null,"dir":"Reference","previous_headings":"","what":"Sparse PCA Signal Extraction — step_pca_sparse","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"step_pca_sparse() creates specification recipe step convert numeric data one principal components can zero coefficients.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"","code":"step_pca_sparse( recipe, ..., role = \"predictor\", trained = FALSE, num_comp = 5, predictor_prop = 1, options = list(), res = NULL, prefix = \"PC\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"pca_sparse\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables used compute components. See selections() details. tidy method, currently used. role model terms created step, analysis role assigned? default, function assumes new principal component columns created original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_comp number components retain new predictors. num_comp greater number columns number possible components, smaller value used. num_comp = 0 set transformation done selected variables stay unchanged, regardless value keep_original_cols. predictor_prop maximum number original predictors can non-zero coefficients PCA component (via regularization). options list options default method irlba::ssvd(). res rotation matrix preprocessing step trained prep(). prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables selected), value (loading), component.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"irlba package required step. installed, user prompted step defined. irlba::ssvd() function used encourage sparsity; documentation details method. argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names PC1 - PC9. num_comp = 101, names PC1 - PC101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"tidy() step, tibble retruned columns terms, value, component, id: terms character, selectors variables selected value numeric, variable loading component character, principle component id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"step 2 tuning parameters: num_comp: # Components (type: integer, default: 5) predictor_prop: Proportion Predictors (type: double, default: 1)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"underlying operation allow case weights.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sparse PCA Signal Extraction — step_pca_sparse","text":"","code":"library(recipes) library(ggplot2) data(ad_data, package = \"modeldata\") ad_rec <- recipe(Class ~ ., data = ad_data) %>% step_zv(all_predictors()) %>% step_YeoJohnson(all_numeric_predictors()) %>% step_normalize(all_numeric_predictors()) %>% step_pca_sparse( all_numeric_predictors(), predictor_prop = 0.75, num_comp = 3, id = \"sparse pca\" ) %>% prep() tidy(ad_rec, id = \"sparse pca\") %>% mutate(value = ifelse(value == 0, NA, value)) %>% ggplot(aes(x = component, y = terms, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.y = element_blank())"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":null,"dir":"Reference","previous_headings":"","what":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"step_pca_sparse_bayes() creates specification recipe step convert numeric data one principal components can zero coefficients.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"","code":"step_pca_sparse_bayes( recipe, ..., role = \"predictor\", trained = FALSE, num_comp = 5, prior_slab_dispersion = 1, prior_mixture_threshold = 0.1, options = list(), res = NULL, prefix = \"PC\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"pca_sparse_bayes\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables used compute components. See selections() details. tidy method, currently used. role model terms created step, analysis role assigned? default, function assumes new principal component columns created original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_comp number components retain new predictors. num_comp greater number columns number possible components, smaller value used. num_comp = 0 set transformation done selected variables stay unchanged, regardless value keep_original_cols. prior_slab_dispersion value proportional dispersion (scale) parameter slab portion prior. Smaller values result increase zero coefficients. prior_mixture_threshold parameter defines trade-spike slab components prior. Increasing parameter increases number zero coefficients. options list options default method VBsparsePCA::VBsparsePCA(). res rotation matrix preprocessing step trained prep(). prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble columns terms (selectors variables selected), value (loading), component.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"VBsparsePCA package required step. installed, user prompted step defined. spike--slab prior mixture two priors. One (\"spike\") mass zero represents variable contribution PCA coefficients. prior broader distribution reflects coefficient distribution variables affect PCA analysis. \"slab\". narrower slab, likely coefficient zero (regularized closer zero). mixture two priors governed mixing parameter, prior distribution hyper-parameter prior. PCA coefficients resulting scores unique sign. step attempt make sign components consistent run--run. However, sparsity constraint may interfere goal. argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names PC1 - PC9. num_comp = 101, names PC1 - PC101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"tidy() step, tibble retruned columns terms, value, component, id: terms character, selectors variables selected value numeric, variable loading component character, principle component id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"step 3 tuning parameters: num_comp: # Components (type: integer, default: 5) prior_slab_dispersion: Dispersion Slab Prior (type: double, default: 1) prior_mixture_threshold: Threshold Mixture Prior (type: double, default: 0.1)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"Ning, B. (2021). Spike slab Bayesian sparse principal component analysis. arXiv:2102.00305.","code":""},{"path":[]},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_sparse_bayes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sparse Bayesian PCA Signal Extraction — step_pca_sparse_bayes","text":"","code":"library(recipes) library(ggplot2) data(ad_data, package = \"modeldata\") ad_rec <- recipe(Class ~ ., data = ad_data) %>% step_zv(all_predictors()) %>% step_YeoJohnson(all_numeric_predictors()) %>% step_normalize(all_numeric_predictors()) %>% step_pca_sparse_bayes( all_numeric_predictors(), prior_mixture_threshold = 0.95, prior_slab_dispersion = 0.05, num_comp = 3, id = \"sparse bayesian pca\" ) %>% prep() tidy(ad_rec, id = \"sparse bayesian pca\") %>% mutate(value = ifelse(value == 0, NA, value)) %>% ggplot(aes(x = component, y = terms, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.y = element_blank())"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":null,"dir":"Reference","previous_headings":"","what":"Truncated PCA Signal Extraction — step_pca_truncated","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"step_pca_truncated() creates specification recipe step convert numeric data one principal components. truncated calculates number components asked instead done recipes::step_pca().","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"","code":"step_pca_truncated( recipe, ..., role = \"predictor\", trained = FALSE, num_comp = 5, options = list(), res = NULL, columns = NULL, prefix = \"PC\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"pca_truncated\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables step. See selections() details. role model terms created step, analysis role assigned? default, new columns created step original variables used predictors model. trained logical indicate quantities preprocessing estimated. num_comp number components retain new predictors. num_comp greater number columns number possible components, smaller value used. num_comp = 0 set transformation done selected variables stay unchanged, regardless value keep_original_cols. options list options default method irlba::prcomp_irlba(). Argument defaults set retx = FALSE, center = FALSE, scale. = FALSE, tol = NULL. Note argument x passed (). res irlba::prcomp_irlba() object stored preprocessing step trained prep(). columns character string selected variable names. field placeholder populated prep() used. prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"Principal component analysis (PCA) transformation group variables produces new set artificial features components. components designed capture maximum amount information (.e. variance) original variables. Also, components statistically independent one another. means can used combat large inter-variables correlations data set. advisable standardize variables prior running PCA. , variable centered scaled prior PCA calculation. can changed using options argument using step_center() step_scale(). argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names PC1 - PC9. num_comp = 101, names PC1 - PC101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"tidy() step two things can happen depending type argument. type = \"coef\" tibble returned 4 columns terms, value, component , id: terms character, selectors variables selected value numeric, variable loading component character, principle component id character, id step type = \"variance\" tibble returned 4 columns terms, value, component , id: terms character, type variance value numeric, value variance component integer, principle component id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"step 1 tuning parameters: num_comp: # Components (type: integer, default: 5)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"step performs unsupervised operation can utilize case weights. result, case weights used frequency weights. information, see documentation case_weights examples tidymodels.org.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"Jolliffe, . T. (2010). Principal Component Analysis. Springer.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_pca_truncated.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Truncated PCA Signal Extraction — step_pca_truncated","text":"","code":"rec <- recipe(~., data = mtcars) pca_trans <- rec %>% step_normalize(all_numeric()) %>% step_pca_truncated(all_numeric(), num_comp = 2) pca_estimates <- prep(pca_trans, training = mtcars) pca_data <- bake(pca_estimates, mtcars) rng <- extendrange(c(pca_data$PC1, pca_data$PC2)) plot(pca_data$PC1, pca_data$PC2, xlim = rng, ylim = rng ) tidy(pca_trans, number = 2) #> # A tibble: 1 × 4 #> terms value component id #> #> 1 all_numeric() NA NA pca_truncated_AGa8C tidy(pca_estimates, number = 2) #> # A tibble: 22 × 4 #> terms value component id #> #> 1 mpg 0.363 PC1 pca_truncated_AGa8C #> 2 cyl -0.374 PC1 pca_truncated_AGa8C #> 3 disp -0.368 PC1 pca_truncated_AGa8C #> 4 hp -0.330 PC1 pca_truncated_AGa8C #> 5 drat 0.294 PC1 pca_truncated_AGa8C #> 6 wt -0.346 PC1 pca_truncated_AGa8C #> 7 qsec 0.200 PC1 pca_truncated_AGa8C #> 8 vs 0.307 PC1 pca_truncated_AGa8C #> 9 am 0.235 PC1 pca_truncated_AGa8C #> 10 gear 0.207 PC1 pca_truncated_AGa8C #> # ℹ 12 more rows"},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":null,"dir":"Reference","previous_headings":"","what":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"step_umap() creates specification recipe step project set features smaller space.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"","code":"step_umap( recipe, ..., role = \"predictor\", trained = FALSE, outcome = NULL, neighbors = 15, num_comp = 2, min_dist = 0.01, metric = \"euclidean\", learn_rate = 1, epochs = NULL, initial = \"spectral\", target_weight = 0.5, options = list(verbose = FALSE, n_threads = 1), seed = sample(10^5, 2), prefix = \"UMAP\", keep_original_cols = FALSE, retain = deprecated(), object = NULL, skip = FALSE, id = rand_id(\"umap\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables step. See selections() details. role model terms created step, analysis role assigned? default, new columns created step original variables used predictors model. trained logical indicate quantities preprocessing estimated. outcome call vars specify variable used outcome encoding process (). neighbors integer number nearest neighbors used construct target simplicial set. neighbors greater number data points, smaller value used. num_comp integer number UMAP components. num_comp greater number selected columns minus one, smaller value used. min_dist effective minimum distance embedded points. metric Character, type distance metric use find nearest neighbors. See uwot::umap() details. Default \"euclidean\". learn_rate Positive number learning rate optimization process. epochs Number iterations neighbor optimization. See uwot::umap() details. initial Character, Type initialization coordinates. Can one \"spectral\", \"normlaplacian\", \"random\", \"lvrandom\", \"laplacian\", \"pca\", \"spca\", \"agspectral\", matrix initial coordinates. See uwot::umap() details. Default \"spectral\". target_weight Weighting factor data topology target topology. value 0.0 weights entirely data, value 1.0 weights entirely target. default 0.5 balances weighting equally data target. options list options pass uwot::umap(). arguments X, n_neighbors, n_components, min_dist, n_epochs, ret_model, learning_rate passed . default, verbose n_threads set. seed Two integers control random numbers used numerical methods. default pulls main session's stream numbers give reproducible results seed set prior calling prep() bake(). prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. retain Use keep_original_cols instead specify whether original predictors retained along new embedding variables. object object defines encoding. NULL step trained recipes::prep(). skip logical. step skipped recipe baked bake()? operations baked prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations. id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"updated version recipe new step added sequence existing operations.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"UMAP, short Uniform Manifold Approximation Projection, nonlinear dimension reduction technique finds local, low-dimensional representations data. can run unsupervised supervised different types outcome data (e.g. numeric, factor, etc). argument num_comp controls number components retained (original variables used derive components removed data). new components names begin prefix sequence numbers. variable names padded zeros. example, num_comp < 10, names UMAP1 - UMAP9. num_comp = 101, names UMAP1 - UMAP101.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"tidy() step, tibble retruned columns terms id: terms character, selectors variables selected id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"step 7 tuning parameters: num_comp: # Components (type: integer, default: 2) neighbors: # Nearest Neighbors (type: integer, default: 15) min_dist: Min Distance Points (type: double, default: 0.01) learn_rate: Learning Rate (type: double, default: 1) epochs: # Epochs (type: integer, default: NULL) initial: UMAP Initialization (type: character, default: spectral) target_weight: Proportion Supervised (type: double, default: 0.5)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"saving-prepped-recipe-object","dir":"Reference","previous_headings":"","what":"Saving prepped recipe object","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"recipe step may require native serialization saving use another R session. learn serialization prepped recipes, see bundle package.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"McInnes, L., & Healy, J. (2018). UMAP: Uniform Manifold Approximation Projection Dimension Reduction. https://arxiv.org/abs/1802.03426. \"UMAP Works\" https://umap-learn.readthedocs.io/en/latest/how_umap_works.html","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_umap.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Supervised and unsupervised uniform manifold approximation and projection (UMAP) — step_umap","text":"","code":"if (FALSE) { # rlang::is_installed(\"ggplot2\") && rlang::is_installed(\"irlba\", version = \"2.3.5.2\") library(recipes) library(ggplot2) split <- seq.int(1, 150, by = 9) tr <- iris[-split, ] te <- iris[split, ] set.seed(11) supervised <- recipe(Species ~ ., data = tr) %>% step_center(all_predictors()) %>% step_scale(all_predictors()) %>% step_umap(all_predictors(), outcome = vars(Species), num_comp = 2) %>% prep(training = tr) theme_set(theme_bw()) bake(supervised, new_data = te, Species, starts_with(\"umap\")) %>% ggplot(aes(x = UMAP1, y = UMAP2, col = Species)) + geom_point(alpha = .5) }"},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":null,"dir":"Reference","previous_headings":"","what":"Weight of evidence transformation — step_woe","title":"Weight of evidence transformation — step_woe","text":"step_woe() creates specification recipe step transform nominal data numerical transformation based weights evidence binary outcome.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Weight of evidence transformation — step_woe","text":"","code":"step_woe( recipe, ..., role = \"predictor\", outcome, trained = FALSE, dictionary = NULL, Laplace = 1e-06, prefix = \"woe\", keep_original_cols = FALSE, skip = FALSE, id = rand_id(\"woe\") )"},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Weight of evidence transformation — step_woe","text":"recipe recipe object. step added sequence operations recipe. ... One selector functions choose variables used compute components. See selections() details. tidy method, currently used. role model terms created step, analysis role assigned?. default, function assumes new woe components columns created original variables used predictors model. outcome bare name binary outcome encased vars(). trained logical indicate quantities preprocessing estimated. dictionary tbl. map levels woe values. must layout output returned dictionary(). NULL function build dictionary variables passed .... See dictionary() details. Laplace Laplace smoothing parameter. value usually applied avoid -Inf/Inf predictor category one outcome class. Set 0 allow Inf/-Inf. default 1e-6. Also known 'pseudocount' parameter Laplace smoothing technique. prefix character string prefix resulting new variables. See notes . keep_original_cols logical keep original variables output. Defaults FALSE. skip logical. step skipped recipe baked recipes::bake()? operations baked recipes::prep() run, operations may able conducted new data (e.g. processing outcome variable(s)). Care taken using skip = TRUE may affect computations subsequent operations id character string unique step identify .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Weight of evidence transformation — step_woe","text":"updated version recipe new step added sequence existing steps (). tidy method, tibble woe dictionary used map categories woe values.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Weight of evidence transformation — step_woe","text":"WoE transformation group variables produces new set features. formula $$woe_c = log((P(X = c|Y = 1))/(P(X = c|Y = 0)))$$ \\(c\\) goes 1 \\(C\\) levels given nominal predictor variable \\(X\\). components designed transform nominal variables numerical ones property order magnitude reflects association binary outcome. apply numerical predictors, advisable discretize variables prior running WoE. , variable binarized woe associated later. can achieved using step_discretize(). argument Laplace small quantity added proportions 1's 0's goal avoid log(p/0) log(0/p) results. numerical woe versions names begin woe_ followed respective original name variables. See Good (1985). One can pass custom dictionary tibble step_woe(). must structure output dictionary() (see examples). provided created automatically. role tibble store map levels nominal predictor woe values. may want tweak object goal fix orders levels one given predictor. One easy way tweaking output returned dictionary().","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"tidying","dir":"Reference","previous_headings":"","what":"Tidying","title":"Weight of evidence transformation — step_woe","text":"tidy() step, tibble columns terms (selectors variables selected), value, n_tot, n_bad, n_good, p_bad, p_good, woe outcome returned.. See dictionary() information. tidy() step, tibble retruned columns terms value, n_tot, n_bad, n_good, p_bad, p_good, woe outcome id: terms character, selectors variables selected value character, level outcome n_tot integer, total number n_bad integer, number bad examples n_good integer, number good examples p_bad numeric, p bad examples p_good numeric, p good examples woe numeric, weight evidence outcome character, name outcome variable id character, id step","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"tuning-parameters","dir":"Reference","previous_headings":"","what":"Tuning Parameters","title":"Weight of evidence transformation — step_woe","text":"step 1 tuning parameters: Laplace: Laplace Correction (type: double, default: 1e-06)","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"case-weights","dir":"Reference","previous_headings":"","what":"Case weights","title":"Weight of evidence transformation — step_woe","text":"underlying operation allow case weights.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Weight of evidence transformation — step_woe","text":"Kullback, S. (1959). Information Theory Statistics. Wiley, New York. Hastie, T., Tibshirani, R. Friedman, J. (1986). Elements Statistical Learning, Second Edition, Springer, 2009. Good, . J. (1985), \"Weight evidence: brief survey\", Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/step_woe.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Weight of evidence transformation — step_woe","text":"","code":"library(modeldata) data(\"credit_data\") set.seed(111) in_training <- sample(1:nrow(credit_data), 2000) credit_tr <- credit_data[in_training, ] credit_te <- credit_data[-in_training, ] rec <- recipe(Status ~ ., data = credit_tr) %>% step_woe(Job, Home, outcome = vars(Status)) woe_models <- prep(rec, training = credit_tr) #> Warning: Some columns used by `step_woe()` have categories with less than 10 values: 'Home', 'Job' # the encoding: bake(woe_models, new_data = credit_te %>% slice(1:5), starts_with(\"woe\")) #> # A tibble: 5 × 2 #> woe_Job woe_Home #> #> 1 -0.451 0.519 #> 2 0.187 -0.512 #> 3 -0.451 -0.512 #> 4 0.187 -0.512 #> 5 1.51 -0.0519 # the original data credit_te %>% slice(1:5) %>% dplyr::select(Job, Home) #> Job Home #> 1 fixed rent #> 2 freelance owner #> 3 fixed owner #> 4 freelance owner #> 5 partime parents # the details: tidy(woe_models, number = 1) #> # A tibble: 12 × 10 #> terms value n_tot n_bad n_good p_bad p_good woe outcome id #> #> 1 Job fixed 1261 273 988 0.451 0.708 -0.451 Status woe_… #> 2 Job freelan… 463 159 304 0.263 0.218 0.187 Status woe_… #> 3 Job others 74 39 35 0.0645 0.0251 0.944 Status woe_… #> 4 Job partime 201 133 68 0.220 0.0487 1.51 Status woe_… #> 5 Job NA 1 1 0 0.00165 0 14.7 Status woe_… #> 6 Home ignore 8 4 4 0.00661 0.00287 0.835 Status woe_… #> 7 Home other 161 78 83 0.129 0.0595 0.773 Status woe_… #> 8 Home owner 931 192 739 0.317 0.530 -0.512 Status woe_… #> 9 Home parents 336 98 238 0.162 0.171 -0.0519 Status woe_… #> 10 Home priv 113 42 71 0.0694 0.0509 0.310 Status woe_… #> 11 Home rent 446 188 258 0.311 0.185 0.519 Status woe_… #> 12 Home NA 5 3 2 0.00496 0.00143 1.24 Status woe_… # Example of custom dictionary + tweaking # custom dictionary woe_dict_custom <- credit_tr %>% dictionary(Job, Home, outcome = \"Status\") woe_dict_custom[4, \"woe\"] <- 1.23 # tweak # passing custom dict to step_woe() rec_custom <- recipe(Status ~ ., data = credit_tr) %>% step_woe( Job, Home, outcome = vars(Status), dictionary = woe_dict_custom ) %>% prep() #> Warning: Some columns used by `step_woe()` have categories with less than 10 values: 'Home', 'Job' rec_custom_baked <- bake(rec_custom, new_data = credit_te) rec_custom_baked %>% dplyr::filter(woe_Job == 1.23) %>% head() #> # A tibble: 6 × 14 #> Seniority Time Age Marital Records Expenses Income Assets Debt #> #> 1 0 48 41 married no 90 80 0 0 #> 2 0 18 21 single yes 35 50 0 0 #> 3 0 36 23 single no 45 122 2500 0 #> 4 14 24 51 married no 75 198 1000 0 #> 5 1 60 26 single no 35 120 0 0 #> 6 1 36 24 married no 76 164 0 0 #> # ℹ 5 more variables: Amount , Price , Status , #> # woe_Job , woe_Home "},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":null,"dir":"Reference","previous_headings":"","what":"tunable methods for embed — tunable.step_discretize_cart","title":"tunable methods for embed — tunable.step_discretize_cart","text":"functions define parameters can tuned specific steps. also define recommended objects dials package can used generate new parameter values characteristics.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"tunable methods for embed — tunable.step_discretize_cart","text":"","code":"# S3 method for class 'step_discretize_cart' tunable(x, ...) # S3 method for class 'step_discretize_xgb' tunable(x, ...) # S3 method for class 'step_embed' tunable(x, ...) # S3 method for class 'step_pca_sparse' tunable(x, ...) # S3 method for class 'step_pca_sparse_bayes' tunable(x, ...) # S3 method for class 'step_umap' tunable(x, ...) # S3 method for class 'step_woe' tunable(x, ...)"},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"tunable methods for embed — tunable.step_discretize_cart","text":"x recipe step object ... used.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/tunable_embed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"tunable methods for embed — tunable.step_discretize_cart","text":"tibble object.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":null,"dir":"Reference","previous_headings":"","what":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"Calculates summaries WoE (Weight Evidence) binary outcome given predictor variable. Used biuld dictionary.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"","code":"woe_table(predictor, outcome, Laplace = 1e-06, call = rlang::caller_env(0))"},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"predictor atomic vector, usualy distinct values. outcome dependent variable. atomic vector exactly 2 distinct values. Laplace pseudocount parameter Laplace Smoothing estimator. Default 1e-6. Value avoid -Inf/Inf predictor category one outcome class. Set 0 allow Inf/-Inf. call execution environment currently running function, e.g. caller_env(). function mentioned error messages source error. See call argument rlang::abort() information.","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"tibble counts, proportions woe. Warning: woe can possibly -Inf. Use 'Laplace' arg avoid .","code":""},{"path":"https://embed.tidymodels.org/dev/reference/woe_table.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Crosstable with woe between a binary outcome and a predictor variable. — woe_table","text":"Kullback, S. (1959). Information Theory Statistics. Wiley, New York. Hastie, T., Tibshirani, R. Friedman, J. (1986). Elements Statistical Learning, Second Edition, Springer, 2009. Good, . J. (1985), \"Weight evidence: brief survey\", Bayesian Statistics, 2, pp.249-270.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-development-version","dir":"Changelog","previous_headings":"","what":"embed (development version)","title":"embed (development version)","text":"step_umap() tunable initial target_weight arguments. #223, #222)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-114","dir":"Changelog","previous_headings":"","what":"embed 1.1.4","title":"embed 1.1.4","text":"CRAN release: 2024-03-20","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"improvements-1-1-4","dir":"Changelog","previous_headings":"","what":"Improvements","title":"embed 1.1.4","text":"step_umap() gained initial target_weight arguments. (#213) Calling ?tidy.step_*() now sends documentation step_*() outcome documented. (#216) Documentation tidy methods steps improved describe return value accurately. (#217) {keras} {tensorflow} moved Suggests instead Imports. (#218)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-113","dir":"Changelog","previous_headings":"","what":"embed 1.1.3","title":"embed 1.1.3","text":"CRAN release: 2023-10-28 step_collapse_stringdist() now return predictors factors. (#204) Fixed regression 1.1.2 step_lencode_glm() couldn’t used multiple columns.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-112","dir":"Changelog","previous_headings":"","what":"embed 1.1.2","title":"embed 1.1.2","text":"CRAN release: 2023-08-17","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"improvements-1-1-2","dir":"Changelog","previous_headings":"","what":"Improvements","title":"embed 1.1.2","text":"keep_original_cols argument added step_woe(). change mean every step produces new columns keep_original_cols argument. (#194) Many internal changes improve consistency slight speed increases.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"breaking-changes-1-1-2","dir":"Changelog","previous_headings":"","what":"Breaking Changes","title":"embed 1.1.2","text":"step_pca_sparse(), step_pca_truncated() step_pca_sparse_bayes() now returns data unaltered num_comp = 0. done consistent recipes steps nature. (#190)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-111","dir":"Changelog","previous_headings":"","what":"embed 1.1.1","title":"embed 1.1.1","text":"CRAN release: 2023-05-30","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"bug-fixes-1-1-1","dir":"Changelog","previous_headings":"","what":"Bug Fixes","title":"embed 1.1.1","text":"Fixed bug step_pca_truncated() didn’t work zero selection. (#181) tidy() methods step_discretize_cart(), step_discretize_xgb(), step_embed(), step_feature_hash(), step_lencode_bayes(), step_lencode_glm(), step_lencode_mixed(), step_pca_sparse(), step_pca_sparse_bayes(), step_pca_truncated(), step_umap(), step_woe() now correctly return zero-row tibbles used empty selections. (#181)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-110","dir":"Changelog","previous_headings":"","what":"embed 1.1.0","title":"embed 1.1.0","text":"CRAN release: 2023-04-14","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"new-steps-1-1-0","dir":"Changelog","previous_headings":"","what":"New Steps","title":"embed 1.1.0","text":"step_pca_truncated() added. step calculates components required, speedup cases used many variables. (#82)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"improvements-1-1-0","dir":"Changelog","previous_headings":"","what":"Improvements","title":"embed 1.1.0","text":"step_collapse_stringdist() gained method options arguments allow different types string distance calculations. (#152) step_umap() gained argument metric. (#154) step_embed() gained keep_original_cols argument. (#176) steps now required_pkgs() methods. Steps tunable arguments now arguments listed documentation. steps add new columns now informatively error name collision occurs.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-100","dir":"Changelog","previous_headings":"","what":"embed 1.0.0","title":"embed 1.0.0","text":"CRAN release: 2022-07-02 step_collapse_cart() can pool predictor’s factor levels using tree-based method. step_collapse_stringdist() can pool predictor’s factor levels using string distances. Case weights support added step_discretize_cart(), step_discretize_xgb(), step_lencode_bayes(), step_lencode_glm(), step_lencode_mixed().","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-020","dir":"Changelog","previous_headings":"","what":"embed 0.2.0","title":"embed 0.2.0","text":"CRAN release: 2022-04-13 step_embed() now correctly defaults random id word “embed”. (#102) step_feature_hash() soft deprecated embed favor step_dummy_hash() textrecipes. (#95) Steps now dedicated subsection detailing happens tidy() applied. (#105) Reorganize documentation recipe step tidy methods (#115). Fixed bug woe_table() step_woe() didn’t respect factor levels outcome. (109)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-015","dir":"Changelog","previous_headings":"","what":"embed 0.1.5","title":"embed 0.1.5","text":"CRAN release: 2021-11-24 Re-licensed package GPL-2 MIT. See consent copyright holders . tunable parameter ranges step_umap() changed neighbors, num_comp, min_dist prevent uwot segmentation faults. step also check see data dimensions consistent argument values. Two new PCA steps added, using sparse techniques estimation: step_pca_sparse() step_pca_sparse_bayes(). Updated use recipes_eval_select() recipes 0.1.17 (#85). Added prefix argument step_umap() harmonize recipes steps (#93). embed recipe steps now officially support empty selections aligned recipes, dplyr packages use tidyselect. step_woe() longer warns high-cardinality predictors recipe estimated. Instead warns categories fewer 10 data points training set. (#74)","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-014","dir":"Changelog","previous_headings":"","what":"embed 0.1.4","title":"embed 0.1.4","text":"CRAN release: 2021-01-16 Minor release changes test cases CRAN get xgboost work Solaris configuration. lme4 rstanarm now Suggests list automatically installed embed. message written console packages missing associated steps functions invoked.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-013","dir":"Changelog","previous_headings":"","what":"embed 0.1.3","title":"embed 0.1.3","text":"CRAN release: 2020-11-12 changes enable better parallel processing windows.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-012","dir":"Changelog","previous_headings":"","what":"embed 0.1.2","title":"embed 0.1.2","text":"CRAN release: 2020-10-17 Changes enable better parallel processing windows.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-011","dir":"Changelog","previous_headings":"","what":"embed 0.1.1","title":"embed 0.1.1","text":"CRAN release: 2020-07-03 Changes tests get archive jail. Updated plumbing behind step_woe(). Due bug tensorflow, added “warm start” instigate TF session one currently exist.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-010","dir":"Changelog","previous_headings":"","what":"embed 0.1.0","title":"embed 0.1.0","text":"CRAN release: 2020-05-25 Changes dplyr 1.0.0","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"new-steps-0-1-0","dir":"Changelog","previous_headings":"","what":"New Steps","title":"embed 0.1.0","text":"step_discretize_xgb() step_discretize_cart() can used convert numeric predictors categorical using supervised binning methods based tree models. Thanks Konrad Semsch contribution. Added step_feature_hash() creating dummy variables using feature hashing.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"breaking-changes-0-1-0","dir":"Changelog","previous_headings":"","what":"Breaking Changes","title":"embed 0.1.0","text":"tidy.step_woe() now column names consistent recipe steps.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"bug-fixes-0-1-0","dir":"Changelog","previous_headings":"","what":"Bug fixes","title":"embed 0.1.0","text":"Fixed bug detecting TF version.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-006","dir":"Changelog","previous_headings":"","what":"embed 0.0.6","title":"embed 0.0.6","text":"CRAN release: 2020-03-17 Small changes base R’s stringsAsFactors change.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-005","dir":"Changelog","previous_headings":"","what":"embed 0.0.5","title":"embed 0.0.5","text":"CRAN release: 2020-01-07 example data now modeldata package. Small TF updates step_embed().","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-004","dir":"Changelog","previous_headings":"","what":"embed 0.0.4","title":"embed 0.0.4","text":"CRAN release: 2019-09-15 Methods added future generic called tunable(). outlines parameters step can/tuned. Small updates work different versions tidyr.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-003","dir":"Changelog","previous_headings":"","what":"embed 0.0.3","title":"embed 0.0.3","text":"CRAN release: 2019-07-12","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"new-steps-0-0-3","dir":"Changelog","previous_headings":"","what":"New Steps","title":"embed 0.0.3","text":"step_umap() added supervised unsupervised encodings. step_woe() created weight evidence encodings.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-002","dir":"Changelog","previous_headings":"","what":"embed 0.0.2","title":"embed 0.0.2","text":"CRAN release: 2018-11-19 mostly maintainence release compatible version 0.1.3 recipes.","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"other-changes-0-0-2","dir":"Changelog","previous_headings":"","what":"Other Changes:","title":"embed 0.0.2","text":"package now depends generics pacakge get broom tidy methods. Karim Lahrichi added ability use callbacks fitting tensorflow models. PR","code":""},{"path":"https://embed.tidymodels.org/dev/news/index.html","id":"embed-001","dir":"Changelog","previous_headings":"","what":"embed 0.0.1","title":"embed 0.0.1","text":"CRAN release: 2018-09-14 First CRAN version","code":""}]