From 6f9ed5304722026331bdbbfb412d50b6023beb21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Mon, 11 Nov 2024 06:32:40 +0100 Subject: [PATCH 1/3] SNAPSHPOT --- R/as_duckplyr_tibble.R | 1 + R/compute.R | 37 ++++++++++++++++- R/handle_desc.R | 42 ++++++++++++++----- R/mutate.R | 14 +++++-- R/relational-expr.R | 1 + R/translate.R | 93 +++++++++++++++++++++++++----------------- R/unique_table_name.R | 6 +++ 7 files changed, 141 insertions(+), 53 deletions(-) create mode 100644 R/unique_table_name.R diff --git a/R/as_duckplyr_tibble.R b/R/as_duckplyr_tibble.R index dcb306469..e91c313f4 100644 --- a/R/as_duckplyr_tibble.R +++ b/R/as_duckplyr_tibble.R @@ -1,6 +1,7 @@ #' as_duckplyr_tibble #' #' `as_duckplyr_tibble()` converts the input to a tibble and then to a duckplyr data frame. +#' This function also accepts \pkg{dbplyr} lazy tables. #' #' @return For `as_duckplyr_tibble()`, an object of class #' `c("duckplyr_df", class(tibble()))` . diff --git a/R/compute.R b/R/compute.R index 31287154f..df148d6fd 100644 --- a/R/compute.R +++ b/R/compute.R @@ -1,10 +1,43 @@ # Generated by 02-duckplyr_df-methods.R #' @export -compute.duckplyr_df <- function(x, ...) { +compute.duckplyr_df <- function(x, ..., name = NULL, temporary = TRUE) { + if (is.null(name)) { + if (!isTRUE(temporary)) { + cli::cli_abort("{.arg name} must be provided if {.arg temporary} is {.val {FALSE}}.") + } + name <- unique_table_name() + } + stopifnot(!is.null(name) || !isTRUE(temporary)) + + con <- get_default_duckdb_connection() + quoted <- DBI::dbQuoteIdentifier(con, name) + unquoted <- DBI::dbUnquoteIdentifier(con, quoted)[[1]] + if (length(unquoted) == 1) { + schema <- "" + table <- unquoted@name[[1]] + } else if (length(unquoted) == 2) { + schema <- unquoted@name[[1]] + table <- unquoted@name[[2]] + } else { + cli::cli_abort('{.arg name} must be either a string or of the form {.code SQL("schema.table")}') + } + # Our implementation rel_try(NULL, - "No relational implementation for compute()" = TRUE, { + browser() + sql <- paste0( + "CREATE ", + if (isTRUE(temporary)) "TEMPORARY ", + "TABLE ", + quoted, + " AS FROM _" + ) + rel <- duckdb_rel_from_df(x) + duckdb$rel_sql(rel, sql) + out_rel <- duckdb$rel_from_table(con, table_name = table, schema_name = schema) + out <- rel_to_df(out_rel) + out <- dplyr_reconstruct(out, x) return(out) } ) diff --git a/R/handle_desc.R b/R/handle_desc.R index fc5e15a97..1078b763c 100644 --- a/R/handle_desc.R +++ b/R/handle_desc.R @@ -1,23 +1,43 @@ # Used in arrange() +# Handles calls to 'desc' function by +# - extracting the sort order +# - removing any desc-function calls from the expressions: desc(colname) -> colname handle_desc <- function(dots) { - # Handles calls to 'desc' function by - # - extracting the sort order - # - removing any desc-function calls from the expressions: desc(colname) -> colname ascending <- rep(TRUE, length(dots)) for (i in seq_along(dots)) { expr <- quo_get_expr(dots[[i]]) + env <- quo_get_env(dots[[i]]) - if (!is.call(expr)) next - if (expr[[1]] != "desc") next + if (is_desc(expr, env)) { + ascending[[i]] <- FALSE + dots[[i]] <- new_quosure(expr[[2]], env = env) + } + } - # Check that desc is called with a single argument - # (dplyr::desc() accepts only one argument) - if (length(expr) > 2) cli::cli_abort("`desc()` must be called with exactly one argument.") + list(dots = dots, ascending = ascending) +} - ascending[i] <- FALSE - dots[[i]] <- new_quosure(expr[[2]], env = quo_get_env(dots[[i]])) +is_desc <- function(expr, env) { + if (!is.call(expr)) { + return(FALSE) } - list(dots = dots, ascending = ascending) + if (expr[[1]] == "desc") { + if (!identical(eval(expr[[1]], env), dplyr::desc)) { + return(FALSE) + } + } else if (expr[[1]] == "::") { + if (expr[[2]] != "dplyr" && expr[[2]] != "duckplyr") { + return(FALSE) + } + } else { + return(FALSE) + } + + if (length(expr) > 2) { + cli::cli_abort("{.fun desc} must be called with exactly one argument.") + } + + TRUE } diff --git a/R/mutate.R b/R/mutate.R index 3d6da1141..1f271cb19 100644 --- a/R/mutate.R +++ b/R/mutate.R @@ -12,7 +12,9 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used", { rel <- duckdb_rel_from_df(.data) - if (length(by_names) > 0) { + need_oo <- (length(by_names) > 0) + + if (need_oo) { rel <- oo_prep(rel) } @@ -55,8 +57,14 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used", names_new <- c(names_new, new) - new_pos <- match(new, names(current_data), nomatch = length(current_data) + j) new_expr <- rel_translate(quo, names_data = names(current_data), alias = new, partition = by_names, need_window = TRUE) + if (!need_oo && isTRUE(attr(new_expr, "reorder"))) { + rel <- oo_prep(rel) + need_oo <- TRUE + names_out <- rel_names(rel) + } + + new_pos <- match(new, names(current_data), nomatch = length(current_data) + j) exprs[[new_pos]] <- new_expr new_names_used <- intersect(attr(new_expr, "used"), names(.data)) @@ -67,7 +75,7 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used", current_data <- rel_to_df(rel) } - if (length(by_names) > 0) { + if (need_oo) { rel <- oo_restore(rel) } diff --git a/R/relational-expr.R b/R/relational-expr.R index a0c8065e6..43d682ae6 100644 --- a/R/relational-expr.R +++ b/R/relational-expr.R @@ -101,6 +101,7 @@ relexpr_window <- function( stopifnot(is.null(offset_expr) || inherits(offset_expr, "relational_relexpr")) stopifnot(is.null(default_expr) || inherits(default_expr, "relational_relexpr")) stopifnot(is.null(alias) || is_string(alias)) + new_relexpr( list( expr = expr, diff --git a/R/translate.R b/R/translate.R index 05edc3675..7c7f99e69 100644 --- a/R/translate.R +++ b/R/translate.R @@ -221,27 +221,9 @@ rel_translate_lang <- function( } ) - aliases <- c( - sd = "stddev", - first = "first_value", - last = "last_value", - nth = "nth_value", - "/" = "___divide", - "log10" = "___log10", - "log" = "___log", - "as.integer" = "r_base::as.integer", - "<" = "r_base::<", - "<=" = "r_base::<=", - ">" = "r_base::>", - ">=" = "r_base::>=", - "==" = "r_base::==", - "!=" = "r_base::!=", - NULL - ) - known_window <- c( # Window functions - "rank", "dense_rank", "percent_rank", + "min_rank", "dense_rank", "percent_rank", "row_number", "first", "last", "nth", "cume_dist", "lead", "lag", "ntile", @@ -253,14 +235,6 @@ rel_translate_lang <- function( window <- need_window && (name %in% known_window) - if (name %in% names(aliases)) { - name <- aliases[[name]] - if (grepl("^r_base::", name)) { - meta_ext_register() - } - } - # name <- aliases[name] %|% name - order_bys <- list() offset_expr <- NULL default_expr <- NULL @@ -280,6 +254,15 @@ rel_translate_lang <- function( order_bys <- list(do_translate(expr$order_by, in_window = TRUE)) expr$order_by <- NULL } + } else if (name %in% c("row_number", "min_rank", "dense_rank")) { + if (name == "row_number" && length(expr) == 1) { + # Fallthrough + } else if (length(expr) == 2 && is.name(expr[[2]])) { + order_bys <- list(do_translate(expr[[2]], in_window = TRUE)) + expr <- list(expr[[1]]) + } else { + cli::cli_abort("{.fun {name}} can only be translated if it uses column names as arguments") + } } args <- map(as.list(expr[-1]), do_translate, in_window = in_window || window) @@ -290,6 +273,33 @@ rel_translate_lang <- function( } } + # Aliasing comes last: + aliases <- c( + sd = "stddev", + first = "first_value", + last = "last_value", + nth = "nth_value", + min_rank = "rank", + "/" = "___divide", + log10 = "___log10", + log = "___log", + as.integer = "r_base::as.integer", + "<" = "r_base::<", + "<=" = "r_base::<=", + ">" = "r_base::>", + ">=" = "r_base::>=", + "==" = "r_base::==", + "!=" = "r_base::!=", + NULL + ) + + if (name %in% names(aliases)) { + name <- aliases[[name]] + if (grepl("^r_base::", name)) { + meta_ext_register() + } + } + fun <- relexpr_function(name, args) if (window) { partitions <- map(partition, relexpr_reference) @@ -325,6 +335,7 @@ rel_translate <- function( } used <- character() + reorder <- FALSE do_translate <- function(expr, in_window = FALSE, top_level = FALSE) { stopifnot(!is_quosure(expr)) @@ -351,15 +362,23 @@ rel_translate <- function( } }, # - language = rel_translate_lang( - expr, - do_translate, - names_data, - env, - partition, - in_window, - need_window - ), + language = { + lang <- rel_translate_lang( + expr, + do_translate, + names_data, + env, + partition, + in_window, + need_window + ) + + if (inherits(lang, "relational_relexpr_window") && length(lang$order_bys) > 0) { + used <<- unique(c(used, map_chr(lang$order_bys, ~ .x$name))) + reorder <<- TRUE + } + lang + }, # cli::cli_abort("Internal: Unknown type {.val {typeof(expr)}}") ) @@ -371,5 +390,5 @@ rel_translate <- function( out <- relexpr_set_alias(out, alias) } - structure(out, used = used) + structure(out, used = used, reorder = reorder) } diff --git a/R/unique_table_name.R b/R/unique_table_name.R new file mode 100644 index 000000000..42130925a --- /dev/null +++ b/R/unique_table_name.R @@ -0,0 +1,6 @@ +# From dbplyr +unique_table_name <- function(prefix = "") { + vals <- c(letters, LETTERS, 0:9) + name <- paste0(sample(vals, 10, replace = TRUE), collapse = "") + paste0(prefix, "duckplyr_", name) +} From 30c1440450f9e78db2dc4e0a6205f8912706808e Mon Sep 17 00:00:00 2001 From: krlmlr Date: Fri, 22 Nov 2024 01:57:36 +0000 Subject: [PATCH 2/3] chore: Auto-update from GitHub Actions Run: https://github.com/tidyverse/duckplyr/actions/runs/11964996619 --- man/as_duckplyr_df.Rd | 1 + 1 file changed, 1 insertion(+) diff --git a/man/as_duckplyr_df.Rd b/man/as_duckplyr_df.Rd index d7458df27..bae85bc5c 100644 --- a/man/as_duckplyr_df.Rd +++ b/man/as_duckplyr_df.Rd @@ -30,6 +30,7 @@ and will fail for any other classes, including subclasses of \code{"data.frame"} This behavior is likely to change, do not rely on it. \code{as_duckplyr_tibble()} converts the input to a tibble and then to a duckplyr data frame. +This function also accepts \pkg{dbplyr} lazy tables. } \details{ Set the \code{DUCKPLYR_FALLBACK_INFO} and \code{DUCKPLYR_FORCE} environment variables From fd2c8b2e598e056dbdcbee9244371b21088f62f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 28 Nov 2024 11:47:09 +0100 Subject: [PATCH 3/3] TO BE SORTED OUT --- R/compute.R | 1 - R/meta.R | 281 ------------------------------------------- man/config.Rd | 9 +- man/df_from_file.Rd | 29 ++++- man/read_duckplyr.Rd | 47 ++++++++ 5 files changed, 79 insertions(+), 288 deletions(-) create mode 100644 man/read_duckplyr.Rd diff --git a/R/compute.R b/R/compute.R index df148d6fd..c3be5fc29 100644 --- a/R/compute.R +++ b/R/compute.R @@ -25,7 +25,6 @@ compute.duckplyr_df <- function(x, ..., name = NULL, temporary = TRUE) { # Our implementation rel_try(NULL, { - browser() sql <- paste0( "CREATE ", if (isTRUE(temporary)) "TEMPORARY ", diff --git a/R/meta.R b/R/meta.R index d164dc408..e69de29bb 100644 --- a/R/meta.R +++ b/R/meta.R @@ -1,281 +0,0 @@ -call_stack <- collections::stack() -pre_code_cache <- collections::queue() -code_cache <- collections::queue() -ext_cache <- collections::dict() -macro_cache <- collections::dict() -df_cache <- collections::dict() -rel_cache <- collections::dict() - -meta_call <- function(name) { - meta_call_start(name) - withr::defer_parent(meta_call_end()) -} - -meta_call_start <- function(name) { - call_stack$push(name) -} - -meta_call_end <- function() { - call_stack$pop() -} - -meta_call_current <- function() { - if (call_stack$size() == 0) { - return(NULL) - } - call_stack$peek() -} - -meta_clear <- function() { - pre_code_cache$clear() - code_cache$clear() - ext_cache$clear() - macro_cache$clear() - df_cache$clear() - rel_cache$clear() -} - -meta_pre_record <- function(call) { - pre_code_cache$push(new_prom_fun({{ call }})) - invisible() -} - -meta_record <- function(call) { - code_cache$push(new_prom_fun({{ call }})) - invisible() -} - -meta_replay <- function(add_pre_code = TRUE) { - if (add_pre_code) { - con_exprs <- list( - expr(duckdb <- asNamespace("duckdb")), - expr(drv <- duckdb::duckdb()), - expr(con <- DBI::dbConnect(drv)), - expr(experimental <- !!(Sys.getenv("DUCKPLYR_EXPERIMENTAL") == "TRUE")) - ) - con_code <- map(con_exprs, constructive::deparse_call) - pre_code <- c( - con_code, - map(pre_code_cache$as_list(), ~ .x()) - ) - } else { - pre_code <- NULL - } - - # HACK - count <- rel_cache$size() - res_name <- sym(paste0("rel", count)) - res_mat_expr <- expr(duckdb$rel_to_altrep(!!res_name)) - res_code <- map(list(res_name, res_mat_expr), constructive::deparse_call) - - all_code <- c( - pre_code, - map(code_cache$as_list(), ~ .x()), - res_code - ) - - walk(all_code, print) -} - -meta_replay_to_fun_code <- function() { - code <- utils::capture.output(meta_replay(add_pre_code = FALSE)) - code <- c( - paste0("function(con, experimental) {"), - paste0(" ", code), - "}" - ) - - # Trailing newline - paste0(code, "\n", collapse = "") -} - -meta_replay_to_fun <- function(text = meta_replay_to_fun_code()) { - eval(parse(text = text)[[1]]) -} - -meta_replay_to_fun_file <- function(name) { - code <- paste0( - "# Generated by meta_replay_to_fun_file(), do not edit by hand\n", name, " <- ", - meta_replay_to_fun_code() - ) - - path <- file.path("R", paste0(name, ".R")) - - brio::write_file(code, path) -} - -meta_replay_to_file <- function(path, extra = character()) { - code <- utils::capture.output(meta_replay()) - writeLines(c(extra, code), path) -} - -meta_replay_to_new_doc <- function() { - code <- utils::capture.output(meta_replay()) - rstudioapi::documentNew(code, execute = TRUE) -} - -meta_replay_to_reprex <- function(...) { - code <- utils::capture.output(meta_replay()) - reprex::reprex(input = code, ...) -} - -meta_eval <- function() { - code <- utils::capture.output(meta_replay()) - eval(parse(text = code)) -} - -meta_ext_register <- function(name = "rfuns") { - if (ext_cache$has(name)) { - return(invisible()) - } - - stopifnot(identical(name, "rfuns")) - - ext_install_expr <- expr(invisible( - duckdb$rapi_load_rfuns(drv@database_ref) - )) - meta_pre_record(constructive::deparse_call(ext_install_expr)) - - ext_cache$set(name, TRUE) - invisible() -} - -meta_macro_register <- function(name) { - macro <- duckplyr_macros[name] - if (is.na(macro)) { - return(invisible()) - } - - if (macro_cache$has(name)) { - return(invisible()) - } - - # Register functions from the rfuns extension - # Can't use '^"r_' because of the way the macro is defined - if (grepl('"r_', macro)) { - meta_ext_register() - } - - macro_expr <- expr(invisible( - DBI::dbExecute(con, !!paste0('CREATE MACRO "', names(macro), '"', macro)) - )) - meta_pre_record(constructive::deparse_call(macro_expr)) - - macro_cache$set(name, TRUE) - invisible() -} - -meta_df_register <- function(df) { - if (df_cache$has(df)) { - return(invisible(df_cache$get(df))) - } - - count <- df_cache$size() - name <- sym(paste0("df", count + 1)) - - df_expr <- NULL - - if (Sys.getenv("DUCKPLYR_META_GLOBAL") == "TRUE") { - global_dfs <- mget(ls(.GlobalEnv), .GlobalEnv, mode = "list", ifnotfound = list(NULL)) - - for (df_name in names(global_dfs)) { - global_df <- global_dfs[[df_name]] - # FIXME: Does this also work with pointer comparison? - if (identical(df, global_df)) { - df_expr <- sym(df_name) - break - } - } - } - - df_cache$set(df, name) - - if (is.null(df_expr)) { - class(df) <- setdiff(class(df), "duckplyr_df") - meta_record(constructive::construct_multi(list2(!!name := df))) - } else { - # Changes df in-place! - class(df) <- setdiff(class(df), "duckplyr_df") - meta_record(constructive::deparse_call(expr(!!name <- !!df_expr))) - } - - invisible(name) -} - -meta_rel_register_df <- function(rel, df) { - if (Sys.getenv("DUCKPLYR_META_SKIP") == "TRUE") { - return(invisible()) - } - - df_name <- meta_df_register(df) - # Expect experimental argument from outside - rel_expr <- expr(duckdb$rel_from_df(con, !!df_name, experimental = experimental)) - meta_rel_register(rel, rel_expr) -} - -meta_rel_register_file <- function(rel, path, table_function, options) { - if (Sys.getenv("DUCKPLYR_META_SKIP") == "TRUE") { - return(invisible()) - } - - rel_expr <- expr( - duckdb$rel_from_table_function(con, !!table_function, list(!!path), list(!!!options)) - ) - meta_rel_register(rel, rel_expr) -} - -meta_rel_register <- function(rel, rel_expr) { - if (Sys.getenv("DUCKPLYR_META_SKIP") == "TRUE") { - return(invisible()) - } - - force(rel_expr) - - count <- rel_cache$size() - name <- sym(paste0("rel", count + 1)) - - current_call <- meta_call_current() - if (!is.null(current_call)) { - # FIXME: This is probably too convoluted - meta_record(constructive::deparse_call(expr(!!current_call))) - } - - # https://github.com/cynkra/constructive/issues/102 - meta_record(constructive::deparse_call(expr(!!name <- !!rel_expr))) - - obj <- list(rel = rel, name = name, df = df) - hash <- deparse(rel) - - rel_cache$set(hash, obj) - invisible() -} - -meta_rel_get <- function(rel) { - hash <- deparse(rel) - - if (!rel_cache$has(hash)) { - rel_out <- paste(utils::capture.output(print(rel), type = "message"), collapse = "\n") - cli::cli_abort(c( - "duckplyr: internal: hash not found", - i = "hash: {hash}", - i = "relation: {rel_out}" - )) - } - - rel_cache$get(hash) -} - -new_prom_fun <- function(code) { - quo <- enquo(code) - - valid <- FALSE - out <- NULL - - function() { - if (!valid) { - out <<- eval_tidy(quo) - valid <<- TRUE - } - out - } -} diff --git a/man/config.Rd b/man/config.Rd index f26346f74..4be3ccb84 100644 --- a/man/config.Rd +++ b/man/config.Rd @@ -10,9 +10,8 @@ and one option. \section{Options}{ -\code{duckdb.materialize_message}: Set to \code{FALSE} to turn off diagnostic output from duckdb -on data frame materialization. -Currenty set to \code{TRUE} when duckplyr is loaded. +\code{duckdb.materialize_callback}: Set to a function with one argument, +this function is called when a duckplyr data frame is materialized. } \section{Environment variables}{ @@ -42,12 +41,12 @@ See \link{fallback} for more options related to logging and uploading of fallbac } \examples{ -# options(duckdb.materialize_message = FALSE) +# options(duckdb.materialize_callback = NULL) data.frame(a = 3:1) \%>\% as_duckplyr_df() \%>\% inner_join(data.frame(a = 1:4), by = "a") -rlang::with_options(duckdb.materialize_message = FALSE, { +rlang::with_options(duckdb.materialize_callback = NULL, { data.frame(a = 3:1) \%>\% as_duckplyr_df() \%>\% inner_join(data.frame(a = 1:4), by = "a") \%>\% diff --git a/man/df_from_file.Rd b/man/df_from_file.Rd index 7816c69ab..6d5a3fdde 100644 --- a/man/df_from_file.Rd +++ b/man/df_from_file.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/io-.R, R/io-csv.R, R/io-parquet.R +% Please edit documentation in R/io-.R, R/io-csv.R, R/io-parquet.R, R/io2-csv.R \name{df_from_file} \alias{df_from_file} \alias{duckplyr_df_from_file} @@ -8,6 +8,7 @@ \alias{df_from_parquet} \alias{duckplyr_df_from_parquet} \alias{df_to_parquet} +\alias{read_csv_duckplyr} \title{Read Parquet, CSV, and other files using DuckDB} \usage{ df_from_file(path, table_function, ..., options = list(), class = NULL) @@ -29,6 +30,8 @@ df_from_parquet(path, ..., options = list(), class = NULL) duckplyr_df_from_parquet(path, ..., options = list(), class = NULL) df_to_parquet(data, path) + +read_csv_duckplyr(path, ..., lazy = TRUE, options = list(), class = NULL) } \arguments{ \item{path}{Path to files, glob patterns \code{*} and \verb{?} are supported.} @@ -82,6 +85,12 @@ that calls \code{as_duckplyr_df()} on the output. If the data frame is a \code{duckplyr_df}, the materialization occurs outside of R. An existing file will be overwritten. This function requires duckdb >= 0.10.0. + +These functions ingest data from a file using a table function. +The results are transparently converted to a data frame, but the data is only read when +the resulting data frame is actually accessed. + +\code{df_from_csv()} reads a CSV file using the \code{read_csv_auto()} table function. } \examples{ # Create simple CSV file @@ -124,4 +133,22 @@ df \%>\% duckplyr_df_from_parquet(path_parquet) unlink(path_parquet) +# Create simple CSV file +path <- tempfile("duckplyr_test_", fileext = ".csv") +write.csv(data.frame(a = 1:3, b = letters[4:6]), path, row.names = FALSE) + +# Reading is immediate +df <- df_from_csv(path) + +# Materialization only upon access +names(df) +df$a + +# Return as tibble, specify column types: +df_from_file( + path, + "read_csv", + options = list(delim = ",", types = list(c("DOUBLE", "VARCHAR"))), + class = class(tibble()) +) } diff --git a/man/read_duckplyr.Rd b/man/read_duckplyr.Rd new file mode 100644 index 000000000..fe1574291 --- /dev/null +++ b/man/read_duckplyr.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/io2-.R +\name{read_duckplyr} +\alias{read_duckplyr} +\title{Read Parquet, CSV, and other files using DuckDB} +\usage{ +read_duckplyr( + path, + table_function, + ..., + lazy = TRUE, + options = list(), + class = NULL +) +} +\arguments{ +\item{path}{Path to files, glob patterns \code{*} and \verb{?} are supported.} + +\item{table_function}{The name of a table-valued +DuckDB function such as \code{"read_parquet"}, +\code{"read_csv"}, \code{"read_csv_auto"} or \code{"read_json"}.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{lazy}{If \code{TRUE} (the default), \code{\link[=collect]{collect()}} must be called +before the data can be accessed.} + +\item{options}{Arguments to the DuckDB function +indicated by \code{table_function}.} + +\item{class}{The class of the output. +By default, a tibble is created. +The returned object will always be a data frame. +Use \code{class = "data.frame"} or \code{class = character()} +to create a plain data frame.} +} +\value{ +A data frame for \code{df_from_file()}, or a \code{duckplyr_df} for +\code{duckplyr_df_from_file()}, extended by the provided \code{class}. +} +\description{ +\code{df_from_file()} uses arbitrary table functions to read data. +See \url{https://duckdb.org/docs/data/overview} for a documentation +of the available functions and their options. +To read multiple files with the same schema, +pass a wildcard or a character vector to the \code{path} argument, +}