From 6f9ed5304722026331bdbbfb412d50b6023beb21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <kirill@cynkra.com>
Date: Mon, 11 Nov 2024 06:32:40 +0100
Subject: [PATCH 1/3] SNAPSHPOT

---
 R/as_duckplyr_tibble.R |  1 +
 R/compute.R            | 37 ++++++++++++++++-
 R/handle_desc.R        | 42 ++++++++++++++-----
 R/mutate.R             | 14 +++++--
 R/relational-expr.R    |  1 +
 R/translate.R          | 93 +++++++++++++++++++++++++-----------------
 R/unique_table_name.R  |  6 +++
 7 files changed, 141 insertions(+), 53 deletions(-)
 create mode 100644 R/unique_table_name.R

diff --git a/R/as_duckplyr_tibble.R b/R/as_duckplyr_tibble.R
index dcb306469..e91c313f4 100644
--- a/R/as_duckplyr_tibble.R
+++ b/R/as_duckplyr_tibble.R
@@ -1,6 +1,7 @@
 #' as_duckplyr_tibble
 #'
 #' `as_duckplyr_tibble()` converts the input to a tibble and then to a duckplyr data frame.
+#' This function also accepts \pkg{dbplyr} lazy tables.
 #'
 #' @return For `as_duckplyr_tibble()`, an object of class
 #'   `c("duckplyr_df", class(tibble()))` .
diff --git a/R/compute.R b/R/compute.R
index 31287154f..df148d6fd 100644
--- a/R/compute.R
+++ b/R/compute.R
@@ -1,10 +1,43 @@
 # Generated by 02-duckplyr_df-methods.R
 #' @export
-compute.duckplyr_df <- function(x, ...) {
+compute.duckplyr_df <- function(x, ..., name = NULL, temporary = TRUE) {
+  if (is.null(name)) {
+    if (!isTRUE(temporary)) {
+      cli::cli_abort("{.arg name} must be provided if {.arg temporary} is {.val {FALSE}}.")
+    }
+    name <- unique_table_name()
+  }
+  stopifnot(!is.null(name) || !isTRUE(temporary))
+
+  con <- get_default_duckdb_connection()
+  quoted <- DBI::dbQuoteIdentifier(con, name)
+  unquoted <- DBI::dbUnquoteIdentifier(con, quoted)[[1]]
+  if (length(unquoted) == 1) {
+    schema <- ""
+    table <- unquoted@name[[1]]
+  } else if (length(unquoted) == 2) {
+    schema <- unquoted@name[[1]]
+    table <- unquoted@name[[2]]
+  } else {
+    cli::cli_abort('{.arg name} must be either a string or of the form {.code SQL("schema.table")}')
+  }
+
   # Our implementation
   rel_try(NULL,
-    "No relational implementation for compute()" = TRUE,
     {
+      browser()
+      sql <- paste0(
+        "CREATE ",
+        if (isTRUE(temporary)) "TEMPORARY ",
+        "TABLE ",
+        quoted,
+        " AS FROM _"
+      )
+      rel <- duckdb_rel_from_df(x)
+      duckdb$rel_sql(rel, sql)
+      out_rel <- duckdb$rel_from_table(con, table_name = table, schema_name = schema)
+      out <- rel_to_df(out_rel)
+      out <- dplyr_reconstruct(out, x)
       return(out)
     }
   )
diff --git a/R/handle_desc.R b/R/handle_desc.R
index fc5e15a97..1078b763c 100644
--- a/R/handle_desc.R
+++ b/R/handle_desc.R
@@ -1,23 +1,43 @@
 # Used in arrange()
+# Handles calls to 'desc' function by
+# - extracting the sort order
+# - removing any desc-function calls from the expressions: desc(colname) -> colname
 handle_desc <- function(dots) {
-  # Handles calls to 'desc' function by
-  # - extracting the sort order
-  # - removing any desc-function calls from the expressions: desc(colname) -> colname
   ascending <- rep(TRUE, length(dots))
 
   for (i in seq_along(dots)) {
     expr <- quo_get_expr(dots[[i]])
+    env <- quo_get_env(dots[[i]])
 
-    if (!is.call(expr)) next
-    if (expr[[1]] != "desc") next
+    if (is_desc(expr, env)) {
+      ascending[[i]] <- FALSE
+      dots[[i]] <- new_quosure(expr[[2]], env = env)
+    }
+  }
 
-    # Check that desc is called with a single argument
-    # (dplyr::desc() accepts only one argument)
-    if (length(expr) > 2) cli::cli_abort("`desc()` must be called with exactly one argument.")
+  list(dots = dots, ascending = ascending)
+}
 
-    ascending[i] <- FALSE
-    dots[[i]] <- new_quosure(expr[[2]], env = quo_get_env(dots[[i]]))
+is_desc <- function(expr, env) {
+  if (!is.call(expr)) {
+    return(FALSE)
   }
 
-  list(dots = dots, ascending = ascending)
+  if (expr[[1]] == "desc") {
+    if (!identical(eval(expr[[1]], env), dplyr::desc)) {
+      return(FALSE)
+    }
+  } else if (expr[[1]] == "::") {
+    if (expr[[2]] != "dplyr" && expr[[2]] != "duckplyr") {
+      return(FALSE)
+    }
+  } else {
+    return(FALSE)
+  }
+
+  if (length(expr) > 2) {
+    cli::cli_abort("{.fun desc} must be called with exactly one argument.")
+  }
+
+  TRUE
 }
diff --git a/R/mutate.R b/R/mutate.R
index 3d6da1141..1f271cb19 100644
--- a/R/mutate.R
+++ b/R/mutate.R
@@ -12,7 +12,9 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used",
     {
       rel <- duckdb_rel_from_df(.data)
 
-      if (length(by_names) > 0) {
+      need_oo <- (length(by_names) > 0)
+
+      if (need_oo) {
         rel <- oo_prep(rel)
       }
 
@@ -55,8 +57,14 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used",
 
           names_new <- c(names_new, new)
 
-          new_pos <- match(new, names(current_data), nomatch = length(current_data) + j)
           new_expr <- rel_translate(quo, names_data = names(current_data), alias = new, partition = by_names, need_window = TRUE)
+          if (!need_oo && isTRUE(attr(new_expr, "reorder"))) {
+            rel <- oo_prep(rel)
+            need_oo <- TRUE
+            names_out <- rel_names(rel)
+          }
+
+          new_pos <- match(new, names(current_data), nomatch = length(current_data) + j)
           exprs[[new_pos]] <- new_expr
 
           new_names_used <- intersect(attr(new_expr, "used"), names(.data))
@@ -67,7 +75,7 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used",
         current_data <- rel_to_df(rel)
       }
 
-      if (length(by_names) > 0) {
+      if (need_oo) {
         rel <- oo_restore(rel)
       }
 
diff --git a/R/relational-expr.R b/R/relational-expr.R
index a0c8065e6..43d682ae6 100644
--- a/R/relational-expr.R
+++ b/R/relational-expr.R
@@ -101,6 +101,7 @@ relexpr_window <- function(
   stopifnot(is.null(offset_expr) || inherits(offset_expr, "relational_relexpr"))
   stopifnot(is.null(default_expr) || inherits(default_expr, "relational_relexpr"))
   stopifnot(is.null(alias) || is_string(alias))
+
   new_relexpr(
     list(
       expr = expr,
diff --git a/R/translate.R b/R/translate.R
index 05edc3675..7c7f99e69 100644
--- a/R/translate.R
+++ b/R/translate.R
@@ -221,27 +221,9 @@ rel_translate_lang <- function(
     }
   )
 
-  aliases <- c(
-    sd = "stddev",
-    first = "first_value",
-    last = "last_value",
-    nth = "nth_value",
-    "/" = "___divide",
-    "log10" = "___log10",
-    "log" = "___log",
-    "as.integer" = "r_base::as.integer",
-    "<" = "r_base::<",
-    "<=" = "r_base::<=",
-    ">" = "r_base::>",
-    ">=" = "r_base::>=",
-    "==" = "r_base::==",
-    "!=" = "r_base::!=",
-    NULL
-  )
-
   known_window <- c(
     # Window functions
-    "rank", "dense_rank", "percent_rank",
+    "min_rank", "dense_rank", "percent_rank",
     "row_number", "first", "last", "nth",
     "cume_dist", "lead", "lag", "ntile",
 
@@ -253,14 +235,6 @@ rel_translate_lang <- function(
 
   window <- need_window && (name %in% known_window)
 
-  if (name %in% names(aliases)) {
-    name <- aliases[[name]]
-    if (grepl("^r_base::", name)) {
-      meta_ext_register()
-    }
-  }
-  # name <- aliases[name] %|% name
-
   order_bys <- list()
   offset_expr <- NULL
   default_expr <- NULL
@@ -280,6 +254,15 @@ rel_translate_lang <- function(
       order_bys <- list(do_translate(expr$order_by, in_window = TRUE))
       expr$order_by <- NULL
     }
+  } else if (name %in% c("row_number", "min_rank", "dense_rank")) {
+    if (name == "row_number" && length(expr) == 1) {
+      # Fallthrough
+    } else if (length(expr) == 2 && is.name(expr[[2]])) {
+      order_bys <- list(do_translate(expr[[2]], in_window = TRUE))
+      expr <- list(expr[[1]])
+    } else {
+      cli::cli_abort("{.fun {name}} can only be translated if it uses column names as arguments")
+    }
   }
 
   args <- map(as.list(expr[-1]), do_translate, in_window = in_window || window)
@@ -290,6 +273,33 @@ rel_translate_lang <- function(
     }
   }
 
+  # Aliasing comes last:
+  aliases <- c(
+    sd = "stddev",
+    first = "first_value",
+    last = "last_value",
+    nth = "nth_value",
+    min_rank = "rank",
+    "/" = "___divide",
+    log10 = "___log10",
+    log = "___log",
+    as.integer = "r_base::as.integer",
+    "<" = "r_base::<",
+    "<=" = "r_base::<=",
+    ">" = "r_base::>",
+    ">=" = "r_base::>=",
+    "==" = "r_base::==",
+    "!=" = "r_base::!=",
+    NULL
+  )
+
+  if (name %in% names(aliases)) {
+    name <- aliases[[name]]
+    if (grepl("^r_base::", name)) {
+      meta_ext_register()
+    }
+  }
+
   fun <- relexpr_function(name, args)
   if (window) {
     partitions <- map(partition, relexpr_reference)
@@ -325,6 +335,7 @@ rel_translate <- function(
   }
 
   used <- character()
+  reorder <- FALSE
 
   do_translate <- function(expr, in_window = FALSE, top_level = FALSE) {
     stopifnot(!is_quosure(expr))
@@ -351,15 +362,23 @@ rel_translate <- function(
         }
       },
       #
-      language = rel_translate_lang(
-        expr,
-        do_translate,
-        names_data,
-        env,
-        partition,
-        in_window,
-        need_window
-      ),
+      language = {
+        lang <- rel_translate_lang(
+          expr,
+          do_translate,
+          names_data,
+          env,
+          partition,
+          in_window,
+          need_window
+        )
+
+        if (inherits(lang, "relational_relexpr_window") && length(lang$order_bys) > 0) {
+          used <<- unique(c(used, map_chr(lang$order_bys, ~ .x$name)))
+          reorder <<- TRUE
+        }
+        lang
+      },
       #
       cli::cli_abort("Internal: Unknown type {.val {typeof(expr)}}")
     )
@@ -371,5 +390,5 @@ rel_translate <- function(
     out <- relexpr_set_alias(out, alias)
   }
 
-  structure(out, used = used)
+  structure(out, used = used, reorder = reorder)
 }
diff --git a/R/unique_table_name.R b/R/unique_table_name.R
new file mode 100644
index 000000000..42130925a
--- /dev/null
+++ b/R/unique_table_name.R
@@ -0,0 +1,6 @@
+# From dbplyr
+unique_table_name <- function(prefix = "") {
+  vals <- c(letters, LETTERS, 0:9)
+  name <- paste0(sample(vals, 10, replace = TRUE), collapse = "")
+  paste0(prefix, "duckplyr_", name)
+}

From 30c1440450f9e78db2dc4e0a6205f8912706808e Mon Sep 17 00:00:00 2001
From: krlmlr <krlmlr@users.noreply.github.com>
Date: Fri, 22 Nov 2024 01:57:36 +0000
Subject: [PATCH 2/3] chore: Auto-update from GitHub Actions

Run: https://github.com/tidyverse/duckplyr/actions/runs/11964996619
---
 man/as_duckplyr_df.Rd | 1 +
 1 file changed, 1 insertion(+)

diff --git a/man/as_duckplyr_df.Rd b/man/as_duckplyr_df.Rd
index d7458df27..bae85bc5c 100644
--- a/man/as_duckplyr_df.Rd
+++ b/man/as_duckplyr_df.Rd
@@ -30,6 +30,7 @@ and will fail for any other classes, including subclasses of \code{"data.frame"}
 This behavior is likely to change, do not rely on it.
 
 \code{as_duckplyr_tibble()} converts the input to a tibble and then to a duckplyr data frame.
+This function also accepts \pkg{dbplyr} lazy tables.
 }
 \details{
 Set the \code{DUCKPLYR_FALLBACK_INFO} and \code{DUCKPLYR_FORCE} environment variables

From fd2c8b2e598e056dbdcbee9244371b21088f62f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <kirill@cynkra.com>
Date: Thu, 28 Nov 2024 11:47:09 +0100
Subject: [PATCH 3/3] TO BE SORTED OUT

---
 R/compute.R          |   1 -
 R/meta.R             | 281 -------------------------------------------
 man/config.Rd        |   9 +-
 man/df_from_file.Rd  |  29 ++++-
 man/read_duckplyr.Rd |  47 ++++++++
 5 files changed, 79 insertions(+), 288 deletions(-)
 create mode 100644 man/read_duckplyr.Rd

diff --git a/R/compute.R b/R/compute.R
index df148d6fd..c3be5fc29 100644
--- a/R/compute.R
+++ b/R/compute.R
@@ -25,7 +25,6 @@ compute.duckplyr_df <- function(x, ..., name = NULL, temporary = TRUE) {
   # Our implementation
   rel_try(NULL,
     {
-      browser()
       sql <- paste0(
         "CREATE ",
         if (isTRUE(temporary)) "TEMPORARY ",
diff --git a/R/meta.R b/R/meta.R
index d164dc408..e69de29bb 100644
--- a/R/meta.R
+++ b/R/meta.R
@@ -1,281 +0,0 @@
-call_stack <- collections::stack()
-pre_code_cache <- collections::queue()
-code_cache <- collections::queue()
-ext_cache <- collections::dict()
-macro_cache <- collections::dict()
-df_cache <- collections::dict()
-rel_cache <- collections::dict()
-
-meta_call <- function(name) {
-  meta_call_start(name)
-  withr::defer_parent(meta_call_end())
-}
-
-meta_call_start <- function(name) {
-  call_stack$push(name)
-}
-
-meta_call_end <- function() {
-  call_stack$pop()
-}
-
-meta_call_current <- function() {
-  if (call_stack$size() == 0) {
-    return(NULL)
-  }
-  call_stack$peek()
-}
-
-meta_clear <- function() {
-  pre_code_cache$clear()
-  code_cache$clear()
-  ext_cache$clear()
-  macro_cache$clear()
-  df_cache$clear()
-  rel_cache$clear()
-}
-
-meta_pre_record <- function(call) {
-  pre_code_cache$push(new_prom_fun({{ call }}))
-  invisible()
-}
-
-meta_record <- function(call) {
-  code_cache$push(new_prom_fun({{ call }}))
-  invisible()
-}
-
-meta_replay <- function(add_pre_code = TRUE) {
-  if (add_pre_code) {
-    con_exprs <- list(
-      expr(duckdb <- asNamespace("duckdb")),
-      expr(drv <- duckdb::duckdb()),
-      expr(con <- DBI::dbConnect(drv)),
-      expr(experimental <- !!(Sys.getenv("DUCKPLYR_EXPERIMENTAL") == "TRUE"))
-    )
-    con_code <- map(con_exprs, constructive::deparse_call)
-    pre_code <- c(
-      con_code,
-      map(pre_code_cache$as_list(), ~ .x())
-    )
-  } else {
-    pre_code <- NULL
-  }
-
-  # HACK
-  count <- rel_cache$size()
-  res_name <- sym(paste0("rel", count))
-  res_mat_expr <- expr(duckdb$rel_to_altrep(!!res_name))
-  res_code <- map(list(res_name, res_mat_expr), constructive::deparse_call)
-
-  all_code <- c(
-    pre_code,
-    map(code_cache$as_list(), ~ .x()),
-    res_code
-  )
-
-  walk(all_code, print)
-}
-
-meta_replay_to_fun_code <- function() {
-  code <- utils::capture.output(meta_replay(add_pre_code = FALSE))
-  code <- c(
-    paste0("function(con, experimental) {"),
-    paste0("  ", code),
-    "}"
-  )
-
-  # Trailing newline
-  paste0(code, "\n", collapse = "")
-}
-
-meta_replay_to_fun <- function(text = meta_replay_to_fun_code()) {
-  eval(parse(text = text)[[1]])
-}
-
-meta_replay_to_fun_file <- function(name) {
-  code <- paste0(
-    "# Generated by meta_replay_to_fun_file(), do not edit by hand\n", name, " <- ",
-    meta_replay_to_fun_code()
-  )
-
-  path <- file.path("R", paste0(name, ".R"))
-
-  brio::write_file(code, path)
-}
-
-meta_replay_to_file <- function(path, extra = character()) {
-  code <- utils::capture.output(meta_replay())
-  writeLines(c(extra, code), path)
-}
-
-meta_replay_to_new_doc <- function() {
-  code <- utils::capture.output(meta_replay())
-  rstudioapi::documentNew(code, execute = TRUE)
-}
-
-meta_replay_to_reprex <- function(...) {
-  code <- utils::capture.output(meta_replay())
-  reprex::reprex(input = code, ...)
-}
-
-meta_eval <- function() {
-  code <- utils::capture.output(meta_replay())
-  eval(parse(text = code))
-}
-
-meta_ext_register <- function(name = "rfuns") {
-  if (ext_cache$has(name)) {
-    return(invisible())
-  }
-
-  stopifnot(identical(name, "rfuns"))
-
-  ext_install_expr <- expr(invisible(
-    duckdb$rapi_load_rfuns(drv@database_ref)
-  ))
-  meta_pre_record(constructive::deparse_call(ext_install_expr))
-
-  ext_cache$set(name, TRUE)
-  invisible()
-}
-
-meta_macro_register <- function(name) {
-  macro <- duckplyr_macros[name]
-  if (is.na(macro)) {
-    return(invisible())
-  }
-
-  if (macro_cache$has(name)) {
-    return(invisible())
-  }
-
-  # Register functions from the rfuns extension
-  # Can't use '^"r_' because of the way the macro is defined
-  if (grepl('"r_', macro)) {
-    meta_ext_register()
-  }
-
-  macro_expr <- expr(invisible(
-    DBI::dbExecute(con, !!paste0('CREATE MACRO "', names(macro), '"', macro))
-  ))
-  meta_pre_record(constructive::deparse_call(macro_expr))
-
-  macro_cache$set(name, TRUE)
-  invisible()
-}
-
-meta_df_register <- function(df) {
-  if (df_cache$has(df)) {
-    return(invisible(df_cache$get(df)))
-  }
-
-  count <- df_cache$size()
-  name <- sym(paste0("df", count + 1))
-
-  df_expr <- NULL
-
-  if (Sys.getenv("DUCKPLYR_META_GLOBAL") == "TRUE") {
-    global_dfs <- mget(ls(.GlobalEnv), .GlobalEnv, mode = "list", ifnotfound = list(NULL))
-
-    for (df_name in names(global_dfs)) {
-      global_df <- global_dfs[[df_name]]
-      # FIXME: Does this also work with pointer comparison?
-      if (identical(df, global_df)) {
-        df_expr <- sym(df_name)
-        break
-      }
-    }
-  }
-
-  df_cache$set(df, name)
-
-  if (is.null(df_expr)) {
-    class(df) <- setdiff(class(df), "duckplyr_df")
-    meta_record(constructive::construct_multi(list2(!!name := df)))
-  } else {
-    # Changes df in-place!
-    class(df) <- setdiff(class(df), "duckplyr_df")
-    meta_record(constructive::deparse_call(expr(!!name <- !!df_expr)))
-  }
-
-  invisible(name)
-}
-
-meta_rel_register_df <- function(rel, df) {
-  if (Sys.getenv("DUCKPLYR_META_SKIP") == "TRUE") {
-    return(invisible())
-  }
-
-  df_name <- meta_df_register(df)
-  # Expect experimental argument from outside
-  rel_expr <- expr(duckdb$rel_from_df(con, !!df_name, experimental = experimental))
-  meta_rel_register(rel, rel_expr)
-}
-
-meta_rel_register_file <- function(rel, path, table_function, options) {
-  if (Sys.getenv("DUCKPLYR_META_SKIP") == "TRUE") {
-    return(invisible())
-  }
-
-  rel_expr <- expr(
-    duckdb$rel_from_table_function(con, !!table_function, list(!!path), list(!!!options))
-  )
-  meta_rel_register(rel, rel_expr)
-}
-
-meta_rel_register <- function(rel, rel_expr) {
-  if (Sys.getenv("DUCKPLYR_META_SKIP") == "TRUE") {
-    return(invisible())
-  }
-
-  force(rel_expr)
-
-  count <- rel_cache$size()
-  name <- sym(paste0("rel", count + 1))
-
-  current_call <- meta_call_current()
-  if (!is.null(current_call)) {
-    # FIXME: This is probably too convoluted
-    meta_record(constructive::deparse_call(expr(!!current_call)))
-  }
-
-  # https://github.com/cynkra/constructive/issues/102
-  meta_record(constructive::deparse_call(expr(!!name <- !!rel_expr)))
-
-  obj <- list(rel = rel, name = name, df = df)
-  hash <- deparse(rel)
-
-  rel_cache$set(hash, obj)
-  invisible()
-}
-
-meta_rel_get <- function(rel) {
-  hash <- deparse(rel)
-
-  if (!rel_cache$has(hash)) {
-    rel_out <- paste(utils::capture.output(print(rel), type = "message"), collapse = "\n")
-    cli::cli_abort(c(
-      "duckplyr: internal: hash not found",
-      i = "hash: {hash}",
-      i = "relation: {rel_out}"
-    ))
-  }
-
-  rel_cache$get(hash)
-}
-
-new_prom_fun <- function(code) {
-  quo <- enquo(code)
-
-  valid <- FALSE
-  out <- NULL
-
-  function() {
-    if (!valid) {
-      out <<- eval_tidy(quo)
-      valid <<- TRUE
-    }
-    out
-  }
-}
diff --git a/man/config.Rd b/man/config.Rd
index f26346f74..4be3ccb84 100644
--- a/man/config.Rd
+++ b/man/config.Rd
@@ -10,9 +10,8 @@ and one option.
 \section{Options}{
 
 
-\code{duckdb.materialize_message}: Set to \code{FALSE} to turn off diagnostic output from duckdb
-on data frame materialization.
-Currenty set to \code{TRUE} when duckplyr is loaded.
+\code{duckdb.materialize_callback}: Set to a function with one argument,
+this function is called when a duckplyr data frame is materialized.
 }
 
 \section{Environment variables}{
@@ -42,12 +41,12 @@ See \link{fallback} for more options related to logging and uploading of fallbac
 }
 
 \examples{
-# options(duckdb.materialize_message = FALSE)
+# options(duckdb.materialize_callback = NULL)
 data.frame(a = 3:1) \%>\%
   as_duckplyr_df() \%>\%
   inner_join(data.frame(a = 1:4), by = "a")
 
-rlang::with_options(duckdb.materialize_message = FALSE, {
+rlang::with_options(duckdb.materialize_callback = NULL, {
   data.frame(a = 3:1) \%>\%
     as_duckplyr_df() \%>\%
     inner_join(data.frame(a = 1:4), by = "a") \%>\%
diff --git a/man/df_from_file.Rd b/man/df_from_file.Rd
index 7816c69ab..6d5a3fdde 100644
--- a/man/df_from_file.Rd
+++ b/man/df_from_file.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/io-.R, R/io-csv.R, R/io-parquet.R
+% Please edit documentation in R/io-.R, R/io-csv.R, R/io-parquet.R, R/io2-csv.R
 \name{df_from_file}
 \alias{df_from_file}
 \alias{duckplyr_df_from_file}
@@ -8,6 +8,7 @@
 \alias{df_from_parquet}
 \alias{duckplyr_df_from_parquet}
 \alias{df_to_parquet}
+\alias{read_csv_duckplyr}
 \title{Read Parquet, CSV, and other files using DuckDB}
 \usage{
 df_from_file(path, table_function, ..., options = list(), class = NULL)
@@ -29,6 +30,8 @@ df_from_parquet(path, ..., options = list(), class = NULL)
 duckplyr_df_from_parquet(path, ..., options = list(), class = NULL)
 
 df_to_parquet(data, path)
+
+read_csv_duckplyr(path, ..., lazy = TRUE, options = list(), class = NULL)
 }
 \arguments{
 \item{path}{Path to files, glob patterns \code{*} and \verb{?} are supported.}
@@ -82,6 +85,12 @@ that calls \code{as_duckplyr_df()} on the output.
 If the data frame is a \code{duckplyr_df}, the materialization occurs outside of R.
 An existing file will be overwritten.
 This function requires duckdb >= 0.10.0.
+
+These functions ingest data from a file using a table function.
+The results are transparently converted to a data frame, but the data is only read when
+the resulting data frame is actually accessed.
+
+\code{df_from_csv()} reads a CSV file using the \code{read_csv_auto()} table function.
 }
 \examples{
 # Create simple CSV file
@@ -124,4 +133,22 @@ df \%>\%
 duckplyr_df_from_parquet(path_parquet)
 
 unlink(path_parquet)
+# Create simple CSV file
+path <- tempfile("duckplyr_test_", fileext = ".csv")
+write.csv(data.frame(a = 1:3, b = letters[4:6]), path, row.names = FALSE)
+
+# Reading is immediate
+df <- df_from_csv(path)
+
+# Materialization only upon access
+names(df)
+df$a
+
+# Return as tibble, specify column types:
+df_from_file(
+  path,
+  "read_csv",
+  options = list(delim = ",", types = list(c("DOUBLE", "VARCHAR"))),
+  class = class(tibble())
+)
 }
diff --git a/man/read_duckplyr.Rd b/man/read_duckplyr.Rd
new file mode 100644
index 000000000..fe1574291
--- /dev/null
+++ b/man/read_duckplyr.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io2-.R
+\name{read_duckplyr}
+\alias{read_duckplyr}
+\title{Read Parquet, CSV, and other files using DuckDB}
+\usage{
+read_duckplyr(
+  path,
+  table_function,
+  ...,
+  lazy = TRUE,
+  options = list(),
+  class = NULL
+)
+}
+\arguments{
+\item{path}{Path to files, glob patterns \code{*} and \verb{?} are supported.}
+
+\item{table_function}{The name of a table-valued
+DuckDB function such as \code{"read_parquet"},
+\code{"read_csv"}, \code{"read_csv_auto"} or \code{"read_json"}.}
+
+\item{...}{These dots are for future extensions and must be empty.}
+
+\item{lazy}{If \code{TRUE} (the default), \code{\link[=collect]{collect()}} must be called
+before the data can be accessed.}
+
+\item{options}{Arguments to the DuckDB function
+indicated by \code{table_function}.}
+
+\item{class}{The class of the output.
+By default, a tibble is created.
+The returned object will always be a data frame.
+Use \code{class = "data.frame"} or \code{class = character()}
+to create a plain data frame.}
+}
+\value{
+A data frame for \code{df_from_file()}, or a \code{duckplyr_df} for
+\code{duckplyr_df_from_file()}, extended by the provided \code{class}.
+}
+\description{
+\code{df_from_file()} uses arbitrary table functions to read data.
+See \url{https://duckdb.org/docs/data/overview} for a documentation
+of the available functions and their options.
+To read multiple files with the same schema,
+pass a wildcard or a character vector to the \code{path} argument,
+}