From 86078768231625d798de4d48615ea926369952a3 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Mon, 30 Aug 2021 10:09:14 -0400 Subject: [PATCH 1/3] first draft --- R/step-subset-expand.R | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/R/step-subset-expand.R b/R/step-subset-expand.R index fefa753fa..1795279e2 100644 --- a/R/step-subset-expand.R +++ b/R/step-subset-expand.R @@ -19,12 +19,29 @@ #' weights = rnorm(6, as.numeric(size) + 2) #' )) #' -#' # All possible combinations --------------------------------------- -#' # Note that all defined, but not necessarily present, levels of the -#' # factor variable `size` are retained. -#' fruits %>% expand(type) -#' fruits %>% expand(type, size) -#' fruits %>% expand(type, size, year) +#' # Factors ---------------------------------------------------------- +#' # The output of `expand()` only contains levels of factor +#' # variables present in the data. This is in contrast to the output of +#' # `expand()` when the input is a data.frame, which retains all factor levels. +#' # Using the `fruits` data, the level "L" is not be present in `expand()` +#' # output, unlike the output of `expand()` after converting `fruits()` to a +#' # tibble. +#' +#' fruits %>% expand(size) %>% as_tibble() +#' #> # A tibble: 3 × 1 +#' #> size +#' #> +#' #> 1 XS +#' #> 2 S +#' #> 3 M +#' fruits %>% as_tibble() %>% expand(size) +#' #> # A tibble: 4 × 1 +#' #> size +#' #> +#' #> 1 XS +#' #> 2 S +#' #> 3 M +#' #> 4 L #' #' # Other uses ------------------------------------------------------- #' fruits %>% expand(type, size, 2010:2012) From ca6f19ac400c02235ab9fa6818d3085fa69c3f16 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Mon, 30 Aug 2021 11:40:58 -0400 Subject: [PATCH 2/3] document factor behavior --- R/step-subset-expand.R | 15 +++++++-------- man/expand.dtplyr_step.Rd | 29 ++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/R/step-subset-expand.R b/R/step-subset-expand.R index 1795279e2..32ee2d7e3 100644 --- a/R/step-subset-expand.R +++ b/R/step-subset-expand.R @@ -2,7 +2,8 @@ #' #' @description #' This is a method for the tidyr `expand()` generic. It is translated to -#' [data.table::CJ()]. +#' [data.table::CJ()]. Unlike the data.frame method, this method only retains +#' factor levels present in the data. #' #' @param data A [lazy_dt()]. #' @inheritParams tidyr::expand @@ -20,13 +21,10 @@ #' )) #' #' # Factors ---------------------------------------------------------- -#' # The output of `expand()` only contains levels of factor -#' # variables present in the data. This is in contrast to the output of -#' # `expand()` when the input is a data.frame, which retains all factor levels. -#' # Using the `fruits` data, the level "L" is not be present in `expand()` -#' # output, unlike the output of `expand()` after converting `fruits()` to a -#' # tibble. -#' +#' # When called on `fruits` defined above, the level "L" is not present in +#' # the output of `expand(fruits, size)`, unlike the output of `expand()` when +#' # `fruits` is a data.frame. +#' #' fruits %>% expand(size) %>% as_tibble() #' #> # A tibble: 3 × 1 #' #> size @@ -54,6 +52,7 @@ #' # Use with `right_join()` to fill in missing rows #' fruits %>% dplyr::right_join(all) # exported onLoad + expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") { dots <- capture_dots(data, ..., .j = FALSE) dots <- dots[!vapply(dots, is_null, logical(1))] diff --git a/man/expand.dtplyr_step.Rd b/man/expand.dtplyr_step.Rd index 31f4698c7..22e678405 100644 --- a/man/expand.dtplyr_step.Rd +++ b/man/expand.dtplyr_step.Rd @@ -49,7 +49,8 @@ to enforce them.} } \description{ This is a method for the tidyr \code{expand()} generic. It is translated to -\code{\link[data.table:J]{data.table::CJ()}}. +\code{\link[data.table:J]{data.table::CJ()}}. Unlike the data.frame method, this method only retains +factor levels present in the data. } \examples{ library(tidyr) @@ -64,12 +65,26 @@ fruits <- lazy_dt(tibble( weights = rnorm(6, as.numeric(size) + 2) )) -# All possible combinations --------------------------------------- -# Note that all defined, but not necessarily present, levels of the -# factor variable `size` are retained. -fruits \%>\% expand(type) -fruits \%>\% expand(type, size) -fruits \%>\% expand(type, size, year) +# Factors ---------------------------------------------------------- +# When called on `fruits` defined above, the level "L" is not present in +# the output of `expand(fruits, size)`, unlike the output of `expand()` when +# `fruits` is a data.frame. + +fruits \%>\% expand(size) \%>\% as_tibble() +#> # A tibble: 3 × 1 +#> size +#> +#> 1 XS +#> 2 S +#> 3 M +fruits \%>\% as_tibble() \%>\% expand(size) +#> # A tibble: 4 × 1 +#> size +#> +#> 1 XS +#> 2 S +#> 3 M +#> 4 L # Other uses ------------------------------------------------------- fruits \%>\% expand(type, size, 2010:2012) From e91110c2f99b2b69d712ca34df9f7d91935f2a52 Mon Sep 17 00:00:00 2001 From: eutwt <11261404+eutwt@users.noreply.github.com> Date: Mon, 30 Aug 2021 11:51:58 -0400 Subject: [PATCH 3/3] formatting --- R/step-subset-expand.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/step-subset-expand.R b/R/step-subset-expand.R index 32ee2d7e3..cce3fd7be 100644 --- a/R/step-subset-expand.R +++ b/R/step-subset-expand.R @@ -52,7 +52,6 @@ #' # Use with `right_join()` to fill in missing rows #' fruits %>% dplyr::right_join(all) # exported onLoad - expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") { dots <- capture_dots(data, ..., .j = FALSE) dots <- dots[!vapply(dots, is_null, logical(1))]