Skip to content

Commit

Permalink
Implement own df_n_col() to avoid calling ncol() (and dim()) (#…
Browse files Browse the repository at this point in the history
…7049)

* Implement own `ncol()` to avoid calling `dim()`

* Introduce `df_n_col()`, block usage of `ncol()`

---------

Co-authored-by: Davis Vaughan <[email protected]>
  • Loading branch information
krlmlr and DavisVaughan authored Aug 5, 2024
1 parent 709bd4e commit bffdfb1
Show file tree
Hide file tree
Showing 15 changed files with 54 additions and 26 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dplyr (development version)

* Fixed an issue where duckplyr's ALTREP data frames were being materialized
early due to internal usage of `ncol()` (#7049).

* R >=3.6.0 is now explicitly required (#7026).

# dplyr 1.1.4
Expand Down
2 changes: 1 addition & 1 deletion R/all-equal.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ equal_data_frame <- function(x, y, ignore_col_order = TRUE, ignore_row_order = T
return("Different number of rows.")
}

if (ncol(x) == 0L) {
if (df_n_col(x) == 0L) {
return(TRUE)
}

Expand Down
2 changes: 1 addition & 1 deletion R/arrange.R
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ sort_key_generator <- function(locale) {
# ------------------------------------------------------------------------------

dplyr_order_legacy <- function(data, direction = "asc") {
if (ncol(data) == 0L) {
if (df_n_col(data) == 0L) {
# Work around `order(!!!list())` returning `NULL`
return(seq_len(nrow(data)))
}
Expand Down
2 changes: 1 addition & 1 deletion R/data-storms.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' <https://github.com/tidyverse/dplyr/blob/main/data-raw/storms.R>
#'
#' @format A tibble with `r format(nrow(storms), big.mark = ",")` observations
#' and `r ncol(storms)` variables:
#' and `r df_n_col(storms)` variables:
#' \describe{
#' \item{name}{Storm Name}
#' \item{year,month,day}{Date of report}
Expand Down
2 changes: 1 addition & 1 deletion R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ dplyr_reconstruct.rowwise_df <- function(data, template) {
}

dplyr_col_select <- function(.data, loc, error_call = caller_env()) {
loc <- vec_as_location(loc, n = ncol(.data), names = names(.data))
loc <- vec_as_location(loc, n = df_n_col(.data), names = names(.data))

out <- .data[loc]
if (!inherits(out, "data.frame")) {
Expand Down
25 changes: 25 additions & 0 deletions R/n-col.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Masks `ncol()` to avoid accidentally materializing ALTREP duckplyr
# data frames.
ncol <- function(x) {
abort("Use `df_n_col()` or `mat_n_col()` instead.")
}

# Alternative to `ncol()` which avoids `dim()`.
#
# `dim()` also requires knowing the number of rows,
# which forces ALTREP duckplyr data frames to materialize.
#
# This function makes the same assertion as vctrs about data frame structure,
# i.e. if `x` inherits from `"data.frame"`, then it is a VECSXP with length
# equal to the number of columns.
df_n_col <- function(x) {
x <- unclass(x)
obj_check_list(x)
length(x)
}

# In a few places we call `ncol()` on matrices, and in those
# cases we want to continue using the base version.
mat_n_col <- function(x) {
base::ncol(x)
}
2 changes: 1 addition & 1 deletion R/rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ rows_check_by <- function(by, y, ..., error_call = caller_env()) {
check_dots_empty()

if (is.null(by)) {
if (ncol(y) == 0L) {
if (df_n_col(y) == 0L) {
abort("`y` must have at least one column.", call = error_call)
}

Expand Down
6 changes: 3 additions & 3 deletions R/sets.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,10 @@ is_compatible <- function(x, y, ignore_col_order = TRUE, convert = TRUE) {
return("`y` must be a data frame.")
}

nc <- ncol(x)
if (nc != ncol(y)) {
nc <- df_n_col(x)
if (nc != df_n_col(y)) {
return(
c(x = glue("Different number of columns: {nc} vs {ncol(y)}."))
c(x = glue("Different number of columns: {nc} vs {df_n_col(y)}."))
)
}

Expand Down
2 changes: 1 addition & 1 deletion R/slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ slice_eval <- function(mask,

slice_idx <- ...elt(i)

if (is.matrix(slice_idx) && ncol(slice_idx) == 1) {
if (is.matrix(slice_idx) && mat_n_col(slice_idx) == 1) {
lifecycle::deprecate_warn(
when = "1.1.0",
what = I("Slicing with a 1-column matrix"),
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test-across.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,15 @@ test_that("across() result locations are aligned with column names (#4967)", {
test_that("across() works sequentially (#4907)", {
df <- tibble(a = 1)
expect_equal(
mutate(df, x = ncol(across(where(is.numeric), identity)), y = ncol(across(where(is.numeric), identity))),
mutate(df, x = df_n_col(across(where(is.numeric), identity)), y = df_n_col(across(where(is.numeric), identity))),
tibble(a = 1, x = 1L, y = 2L)
)
expect_equal(
mutate(df, a = "x", y = ncol(across(where(is.numeric), identity))),
mutate(df, a = "x", y = df_n_col(across(where(is.numeric), identity))),
tibble(a = "x", y = 0L)
)
expect_equal(
mutate(df, x = 1, y = ncol(across(where(is.numeric), identity))),
mutate(df, x = 1, y = df_n_col(across(where(is.numeric), identity))),
tibble(a = 1, x = 1, y = 2L)
)
})
Expand Down Expand Up @@ -282,15 +282,15 @@ test_that("across() gives meaningful messages", {
test_that("monitoring cache - across() can be used twice in the same expression", {
df <- tibble(a = 1, b = 2)
expect_equal(
mutate(df, x = ncol(across(where(is.numeric), identity)) + ncol(across(a, identity))),
mutate(df, x = df_n_col(across(where(is.numeric), identity)) + df_n_col(across(a, identity))),
tibble(a = 1, b = 2, x = 3)
)
})

test_that("monitoring cache - across() can be used in separate expressions", {
df <- tibble(a = 1, b = 2)
expect_equal(
mutate(df, x = ncol(across(where(is.numeric), identity)), y = ncol(across(a, identity))),
mutate(df, x = df_n_col(across(where(is.numeric), identity)), y = df_n_col(across(a, identity))),
tibble(a = 1, b = 2, x = 2, y = 1)
)
})
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-colwise-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,21 +181,21 @@ test_that("summarise_at with multiple columns AND unnamed functions works (#4119
res <- storms %>%
summarise_at(vars(wind, pressure), list(mean, median))

expect_equal(ncol(res), 4L)
expect_equal(df_n_col(res), 4L)
expect_equal(names(res), c("wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2"))

res <- storms %>%
summarise_at(vars(wind, pressure), list(n = length, mean, median))

expect_equal(ncol(res), 6L)
expect_equal(df_n_col(res), 6L)
expect_equal(names(res), c("wind_n", "pressure_n", "wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2"))
})

test_that("mutate_at with multiple columns AND unnamed functions works (#4119)", {
res <- storms %>%
mutate_at(vars(wind, pressure), list(mean, median))

expect_equal(ncol(res), ncol(storms) + 4L)
expect_equal(df_n_col(res), df_n_col(storms) + 4L)
expect_equal(
names(res),
c(names(storms), c("wind_fn1", "pressure_fn1", "wind_fn2", "pressure_fn2"))
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-deprec-context.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ test_that("cur_data() and cur_data_all() work sequentially", {

df <- tibble(a = 1)
expect_equal(
mutate(df, x = ncol(cur_data()), y = ncol(cur_data())),
mutate(df, x = df_n_col(cur_data()), y = df_n_col(cur_data())),
tibble(a = 1, x = 1, y = 2)
)

gf <- tibble(a = 1, b = 2) %>% group_by(a)
expect_equal(
mutate(gf, x = ncol(cur_data_all()), y = ncol(cur_data_all())),
mutate(gf, x = df_n_col(cur_data_all()), y = df_n_col(cur_data_all())),
group_by(tibble(a = 1, b = 2, x = 2, y = 3), a)
)
})
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-deprec-do.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_that("unnamed results bound together by row", {
})

test_that("named argument become list columns", {
out <- df %>% do(nrow = nrow(.), ncol = ncol(.))
out <- df %>% do(nrow = nrow(.), ncol = df_n_col(.))
expect_equal(out$nrow, list(1, 2, 3))
# includes grouping columns
expect_equal(out$ncol, list(3, 3, 3))
Expand Down Expand Up @@ -121,7 +121,7 @@ test_that("empty data frames give consistent outputs", {
vapply(pillar::type_sum, character(1)) %>%
expect_equal(c(x = "dbl", g = "chr", y = "int"))
dat %>%
do(y = ncol(.)) %>%
do(y = df_n_col(.)) %>%
vapply(pillar::type_sum, character(1)) %>%
expect_equal(c(y = "list"))

Expand All @@ -144,7 +144,7 @@ test_that("empty data frames give consistent outputs", {
vapply(pillar::type_sum, character(1)) %>%
expect_equal(c(x = "dbl", g = "chr", y = "int"))
grp %>%
do(y = ncol(.)) %>%
do(y = df_n_col(.)) %>%
vapply(pillar::type_sum, character(1)) %>%
expect_equal(c(g = "chr", y = "list"))

Expand All @@ -166,7 +166,7 @@ test_that("empty data frames give consistent outputs", {
vapply(pillar::type_sum, character(1)) %>%
expect_equal(c(x = "dbl", g = "chr", y = "int"))
emt %>%
do(y = ncol(.)) %>%
do(y = df_n_col(.)) %>%
vapply(pillar::type_sum, character(1)) %>%
expect_equal(c(g = "chr", y = "list"))
})
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-distinct.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ test_that("distinct for single column works as expected (#1937)", {
test_that("distinct works for 0-sized columns (#1437)", {
df <- tibble(x = 1:10) %>% select(-x)
ddf <- distinct(df)
expect_equal(ncol(ddf), 0L)
expect_equal(df_n_col(ddf), 0L)
})

test_that("if no variables specified, uses all", {
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ test_that("select doesn't fail if some names missing", {

test_that("select with no args returns nothing", {
empty <- select(mtcars)
expect_equal(ncol(empty), 0)
expect_equal(df_n_col(empty), 0)
expect_equal(nrow(empty), 32)

empty <- select(mtcars, !!!list())
expect_equal(ncol(empty), 0)
expect_equal(df_n_col(empty), 0)
expect_equal(nrow(empty), 32)
})

Expand Down

0 comments on commit bffdfb1

Please sign in to comment.