Skip to content

Commit

Permalink
perf: Comparison expressions are translated in a way that allows them…
Browse files Browse the repository at this point in the history
… to be pushed down to Parquet (#270)

Co-authored-by: Kirill Müller <[email protected]>
  • Loading branch information
toppyy and krlmlr authored Dec 14, 2024
1 parent 0b01107 commit 41c5dd2
Show file tree
Hide file tree
Showing 89 changed files with 916 additions and 764 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ export(rel_set_intersect)
export(rel_set_symdiff)
export(rel_to_df)
export(rel_union_all)
export(relexpr_comparison)
export(relexpr_constant)
export(relexpr_function)
export(relexpr_reference)
Expand Down
2 changes: 1 addition & 1 deletion R/distinct.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ distinct.duckplyr_df <- function(.data, ..., .keep_all = FALSE) {

expr_filter <- rel_translate(
quo(`___row_number_by` == 1L),
names_data = "___row_number_by"
tibble(`___row_number_by` = 1L)
)
out_rel <- rel_filter(rel, list(expr_filter))

Expand Down
8 changes: 7 additions & 1 deletion R/mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,13 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used",
names_new <- c(names_new, new)

new_pos <- match(new, names(current_data), nomatch = length(current_data) + j)
new_expr <- rel_translate(quo, names_data = names(current_data), alias = new, partition = by_names, need_window = TRUE)
new_expr <- rel_translate(
quo,
current_data,
alias = new,
partition = by_names,
need_window = TRUE
)
exprs[[new_pos]] <- new_expr

new_names_used <- intersect(attr(new_expr, "used"), names(.data))
Expand Down
18 changes: 18 additions & 0 deletions R/relational-duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,13 @@ to_duckdb_expr <- function(x) {
}
out
},
relational_relexpr_comparison = {
out <- duckdb$expr_comparison(x$cmp_op, to_duckdb_exprs(x$exprs))
if (!is.null(x$alias)) {
duckdb$expr_set_alias(out, x$alias)
}
out
},
relational_relexpr_function = {
out <- duckdb$expr_function(x$name, to_duckdb_exprs(x$args))
if (!is.null(x$alias)) {
Expand Down Expand Up @@ -468,6 +475,17 @@ to_duckdb_expr_meta <- function(x) {
}
out
},
relational_relexpr_comparison = {
out <- expr(duckdb$expr_comparison(!!x$cmp_op, list(!!!to_duckdb_exprs_meta(x$exprs))))
if (!is.null(x$alias)) {
out <- expr({
tmp_expr <- !!out
duckdb$expr_set_alias(tmp_expr, !!x$alias)
tmp_expr
})
}
out
},
relational_relexpr_function = {
meta_macro_register(x$name)
out <- expr(duckdb$expr_function(!!x$name, list(!!!to_duckdb_exprs_meta(x$args))))
Expand Down
16 changes: 16 additions & 0 deletions R/relational-expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,22 @@ relexpr_function <- function(name, args, alias = NULL) {
new_relexpr(list(name = name, args = args, alias = alias), class = "relational_relexpr_function")
}

#' relexpr_comparison
#'
#' `relexpr_comparison()` wraps a comparison expression.
#'
#' @param exprs Expressions to compare, a list of `expr` objects.
#' @param cmp_op Comparison operator, e.g., `"<"` or `"=="`.
#' @rdname new_relexpr
#' @return an object of class `"relational_relexpr"`
#' @export
relexpr_comparison <- function(cmp_op, exprs) {
stopifnot(is_string(cmp_op))
stopifnot(is.list(exprs))
new_relexpr(list(cmp_op = cmp_op, exprs = exprs), class = "relational_relexpr_comparison")
}


#' relexpr_window
#'
#' `relexpr_window()` applies a function over a window,
Expand Down
4 changes: 2 additions & 2 deletions R/tpch_raw_01.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ tpch_raw_01 <- function(con, experimental) {
rel3 <- duckdb$rel_filter(
rel2,
list(
duckdb$expr_function(
"r_base::<=",
duckdb$expr_comparison(
"<=",
list(
duckdb$expr_reference("l_shipdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
8 changes: 4 additions & 4 deletions R/tpch_raw_02.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ tpch_raw_02 <- function(con, experimental) {
rel5 <- duckdb$rel_filter(
rel4,
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("p_size"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -319,8 +319,8 @@ tpch_raw_02 <- function(con, experimental) {
rel19 <- duckdb$rel_filter(
rel18,
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("r_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
12 changes: 6 additions & 6 deletions R/tpch_raw_03.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ tpch_raw_03 <- function(con, experimental) {
rel3 <- duckdb$rel_filter(
rel2,
list(
duckdb$expr_function(
"r_base::<",
duckdb$expr_comparison(
"<",
list(
duckdb$expr_reference("o_orderdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -69,8 +69,8 @@ tpch_raw_03 <- function(con, experimental) {
rel6 <- duckdb$rel_filter(
rel5,
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("c_mktsegment"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -186,8 +186,8 @@ tpch_raw_03 <- function(con, experimental) {
rel14 <- duckdb$rel_filter(
rel13,
list(
duckdb$expr_function(
"r_base::>",
duckdb$expr_comparison(
">",
list(
duckdb$expr_reference("l_shipdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
12 changes: 6 additions & 6 deletions R/tpch_raw_04.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ tpch_raw_04 <- function(con, experimental) {
rel3 <- duckdb$rel_filter(
rel2,
list(
duckdb$expr_function(
"r_base::<",
duckdb$expr_comparison(
"<",
list(duckdb$expr_reference("l_commitdate"), duckdb$expr_reference("l_receiptdate"))
)
)
Expand Down Expand Up @@ -73,8 +73,8 @@ tpch_raw_04 <- function(con, experimental) {
rel7 <- duckdb$rel_filter(
rel6,
list(
duckdb$expr_function(
"r_base::>=",
duckdb$expr_comparison(
">=",
list(
duckdb$expr_reference("o_orderdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -84,8 +84,8 @@ tpch_raw_04 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::<",
duckdb$expr_comparison(
"<",
list(
duckdb$expr_reference("o_orderdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
12 changes: 6 additions & 6 deletions R/tpch_raw_05.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ tpch_raw_05 <- function(con, experimental) {
rel5 <- duckdb$rel_filter(
rel4,
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("r_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -313,8 +313,8 @@ tpch_raw_05 <- function(con, experimental) {
rel26 <- duckdb$rel_filter(
rel25,
list(
duckdb$expr_function(
"r_base::>=",
duckdb$expr_comparison(
">=",
list(
duckdb$expr_reference("o_orderdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -324,8 +324,8 @@ tpch_raw_05 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::<",
duckdb$expr_comparison(
"<",
list(
duckdb$expr_reference("o_orderdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
20 changes: 10 additions & 10 deletions R/tpch_raw_06.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ tpch_raw_06 <- function(con, experimental) {
rel3 <- duckdb$rel_filter(
rel2,
list(
duckdb$expr_function(
"r_base::>=",
duckdb$expr_comparison(
">=",
list(
duckdb$expr_reference("l_shipdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -44,8 +44,8 @@ tpch_raw_06 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::<",
duckdb$expr_comparison(
"<",
list(
duckdb$expr_reference("l_shipdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -55,8 +55,8 @@ tpch_raw_06 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::>=",
duckdb$expr_comparison(
">=",
list(
duckdb$expr_reference("l_discount"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -66,8 +66,8 @@ tpch_raw_06 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::<=",
duckdb$expr_comparison(
"<=",
list(
duckdb$expr_reference("l_discount"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -77,8 +77,8 @@ tpch_raw_06 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::<",
duckdb$expr_comparison(
"<",
list(
duckdb$expr_reference("l_quantity"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
40 changes: 20 additions & 20 deletions R/tpch_raw_07.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ tpch_raw_07 <- function(con, experimental) {
duckdb$expr_function(
"|",
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n1_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -56,8 +56,8 @@ tpch_raw_07 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n1_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -171,8 +171,8 @@ tpch_raw_07 <- function(con, experimental) {
duckdb$expr_function(
"|",
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n2_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -182,8 +182,8 @@ tpch_raw_07 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n2_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -366,8 +366,8 @@ tpch_raw_07 <- function(con, experimental) {
rel30 <- duckdb$rel_filter(
rel29,
list(
duckdb$expr_function(
"r_base::>=",
duckdb$expr_comparison(
">=",
list(
duckdb$expr_reference("l_shipdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -377,8 +377,8 @@ tpch_raw_07 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::<=",
duckdb$expr_comparison(
"<=",
list(
duckdb$expr_reference("l_shipdate"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down Expand Up @@ -541,8 +541,8 @@ tpch_raw_07 <- function(con, experimental) {
duckdb$expr_function(
"&",
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n1_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -552,8 +552,8 @@ tpch_raw_07 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n2_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -568,8 +568,8 @@ tpch_raw_07 <- function(con, experimental) {
duckdb$expr_function(
"&",
list(
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n1_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand All @@ -579,8 +579,8 @@ tpch_raw_07 <- function(con, experimental) {
}
)
),
duckdb$expr_function(
"r_base::==",
duckdb$expr_comparison(
"==",
list(
duckdb$expr_reference("n2_name"),
if ("experimental" %in% names(formals(duckdb$expr_constant))) {
Expand Down
Loading

0 comments on commit 41c5dd2

Please sign in to comment.