Skip to content

Commit

Permalink
Expr: Collect BindingAnalysis in bulk rather than one at a time.
Browse files Browse the repository at this point in the history
Speeds up analysis of functions with large numbers of arguments, such
as CASE statements with many branches. The prior code would call "with"
for each argument on the accumulated analysis so far, which needlessly
re-created the sets of variables over and over.
  • Loading branch information
gianm committed Jan 9, 2025
1 parent 9906544 commit b162962
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.druid.java.util.common.StringUtils;

import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.Objects;

/**
Expand Down Expand Up @@ -78,7 +79,8 @@ public String stringify()
public BindingAnalysis analyzeInputs()
{
// currently all binary operators operate on scalar inputs
return left.analyzeInputs().with(right).withScalarArguments(ImmutableSet.of(left, right));
return BindingAnalysis.collectExprs(Arrays.asList(left, right))
.withScalarArguments(ImmutableSet.of(left, right));
}

@Nullable
Expand Down
68 changes: 46 additions & 22 deletions processing/src/main/java/org/apache/druid/math/expr/Expr.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Base interface of Druid expression language abstract syntax tree nodes. All {@link Expr} implementations are
Expand Down Expand Up @@ -576,6 +578,50 @@ private BindingAnalysis(
this.isOutputArray = isOutputArray;
}

/**
* Create an instance by combining a collection of other instances.
*/
public static BindingAnalysis collect(final Collection<BindingAnalysis> others)
{
if (others.isEmpty()) {
return EMTPY;
} else if (others.size() == 1) {
return Iterables.getOnlyElement(others);
} else {
final ImmutableSet.Builder<IdentifierExpr> freeVariables = ImmutableSet.builder();
final ImmutableSet.Builder<IdentifierExpr> scalarVariables = ImmutableSet.builder();
final ImmutableSet.Builder<IdentifierExpr> arrayVariables = ImmutableSet.builder();

boolean hasInputArrays = false;
boolean isOutputArray = false;

for (final BindingAnalysis other : others) {
hasInputArrays = hasInputArrays || other.hasInputArrays;
isOutputArray = isOutputArray || other.isOutputArray;

freeVariables.addAll(other.freeVariables);
scalarVariables.addAll(other.scalarVariables);
arrayVariables.addAll(other.arrayVariables);
}

return new BindingAnalysis(
freeVariables.build(),
scalarVariables.build(),
arrayVariables.build(),
hasInputArrays,
isOutputArray
);
}
}

/**
* Create an instance by combining a collection of analyses from {@link Expr#analyzeInputs()}.
*/
public static BindingAnalysis collectExprs(final Collection<Expr> exprs)
{
return collect(exprs.stream().map(Expr::analyzeInputs).collect(Collectors.toList()));
}

/**
* Get the list of required column inputs to evaluate an expression ({@link IdentifierExpr#binding})
*/
Expand Down Expand Up @@ -658,28 +704,6 @@ public boolean isOutputArray()
return isOutputArray;
}

/**
* Combine with {@link BindingAnalysis} from {@link Expr#analyzeInputs()}
*/
public BindingAnalysis with(Expr other)
{
return with(other.analyzeInputs());
}

/**
* Combine (union) another {@link BindingAnalysis}
*/
public BindingAnalysis with(BindingAnalysis other)
{
return new BindingAnalysis(
ImmutableSet.copyOf(Sets.union(freeVariables, other.freeVariables)),
ImmutableSet.copyOf(Sets.union(scalarVariables, other.scalarVariables)),
ImmutableSet.copyOf(Sets.union(arrayVariables, other.arrayVariables)),
hasInputArrays || other.hasInputArrays,
isOutputArray || other.isOutputArray
);
}

/**
* Add set of arguments as {@link BindingAnalysis#scalarVariables} that are *directly* {@link IdentifierExpr},
* else they are ignored.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,7 @@ public static UnsupportedOperationException cannotVectorize(String msg)
*/
public static Expr.BindingAnalysis analyzeBindings(final List<Expr> args)
{
Expr.BindingAnalysis accumulator = new Expr.BindingAnalysis();
for (final Expr arg : args) {
accumulator = accumulator.with(arg);
}
return accumulator;
return Expr.BindingAnalysis.collectExprs(args);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,11 @@ public Expr visit(Shuttle shuttle)
@Override
public BindingAnalysis analyzeInputs()
{
BindingAnalysis accumulator = new BindingAnalysis();

for (Expr arg : args) {
accumulator = accumulator.with(arg);
}
return accumulator.withScalarArguments(function.getScalarInputs(args))
.withArrayArguments(function.getArrayInputs(args))
.withArrayInputs(function.hasArrayInputs())
.withArrayOutput(function.hasArrayOutput());
return BindingAnalysis.collectExprs(args)
.withScalarArguments(function.getScalarInputs(args))
.withArrayArguments(function.getArrayInputs(args))
.withArrayInputs(function.hasArrayInputs())
.withArrayOutput(function.hasArrayOutput());
}

@Override
Expand Down Expand Up @@ -194,20 +190,16 @@ class ApplyFunctionExpr implements Expr
// apply function expressions are examined during expression selector creation, so precompute and cache the
// binding details of children
ImmutableList.Builder<BindingAnalysis> argBindingDetailsBuilder = ImmutableList.builder();
BindingAnalysis accumulator = new BindingAnalysis();
for (Expr arg : argsExpr) {
BindingAnalysis argDetails = arg.analyzeInputs();
argBindingDetailsBuilder.add(argDetails);
accumulator = accumulator.with(argDetails);
argBindingDetailsBuilder.add(arg.analyzeInputs());
}

lambdaBindingAnalysis = lambdaExpr.analyzeInputs();

bindingAnalysis = accumulator.with(lambdaBindingAnalysis)
.withArrayArguments(function.getArrayInputs(argsExpr))
.withArrayInputs(true)
.withArrayOutput(function.hasArrayOutput(lambdaExpr));
argsBindingAnalyses = argBindingDetailsBuilder.build();
bindingAnalysis = BindingAnalysis.collect(argBindingDetailsBuilder.add(lambdaBindingAnalysis).build())
.withArrayArguments(function.getArrayInputs(argsExpr))
.withArrayInputs(true)
.withArrayOutput(function.hasArrayOutput(lambdaExpr));
}

@Override
Expand Down

0 comments on commit b162962

Please sign in to comment.