Skip to content

Commit

Permalink
fix NestedDataColumnIndexerV4 to not report cardinality
Browse files Browse the repository at this point in the history
changes:
* fix issue similar to #16489 but for NestedDataColumnIndexerV4, which can report STRING type if it only processes a single type of values. this should be less common than the auto indexer problem
* fix some issues with sql benchmarks
  • Loading branch information
clintropolis committed May 28, 2024
1 parent 9d77ef0 commit 3c436fa
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -360,13 +360,16 @@ public void setup()

try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
QUERIES.get(Integer.parseInt(query))
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ignored) {
// the show must go on
catch (Throwable ex) {
log.warn(ex, "non-vectorized and vectorized results do not match");
}

final String sql = QUERIES.get(Integer.parseInt(query));

try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
Expand All @@ -378,8 +381,8 @@ public void setup()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
catch (JsonProcessingException ignored) {

catch (JsonProcessingException ex) {
log.warn(ex, "explain failed");
}

try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
Expand All @@ -393,8 +396,8 @@ public void setup()
}
log.info("Total result row count:" + rowCounter);
}
catch (Throwable ignored) {

catch (Throwable ex) {
log.warn(ex, "failed to count rows");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.QueryRunnerFactoryConglomerate;
Expand Down Expand Up @@ -90,6 +91,7 @@ public class SqlGroupByBenchmark
ExpressionProcessing.initializeForTests();
NestedDataModule.registerHandlersAndSerde();
}
private static final Logger log = new Logger(SqlGroupByBenchmark.class);

private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
{
Expand Down Expand Up @@ -349,12 +351,14 @@ public void setup()

try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
sqlQuery(groupingDimension)
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ignored) {
// the show must go on
catch (Throwable ex) {
log.warn(ex, "non-vectorized and vectorized results do not match");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.benchmark.query;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -301,7 +300,7 @@ public String getFormatString()
private SqlEngine engine;
@Nullable
private PlannerFactory plannerFactory;
private Closer closer = Closer.create();
private final Closer closer = Closer.create();

@Setup(Level.Trial)
public void setup()
Expand Down Expand Up @@ -345,16 +344,19 @@ public void setup()
}
final QueryableIndex index;
if ("auto".equals(schema)) {
List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList());
Iterable<DimensionSchema> columnSchemas = Iterables.concat(
schemaInfo.getDimensionsSpec()
.getDimensions()
.stream()
.map(x -> new AutoTypeColumnSchema(x.getName(), null))
.collect(Collectors.toList()),
Collections.singletonList(new AutoTypeColumnSchema("nested", null))
);
index = segmentGenerator.generate(
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(columnSchemas).build(),
TransformSpec.NONE,
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE,
rowsPerSegment
Expand All @@ -368,7 +370,7 @@ public void setup()
dataSegment,
schemaInfo,
DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
TransformSpec.NONE,
transformSpec,
IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
Granularities.NONE,
rowsPerSegment
Expand Down Expand Up @@ -405,12 +407,14 @@ public void setup()

try {
SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
engine,
plannerFactory,
QUERIES.get(Integer.parseInt(query))
);
log.info("non-vectorized and vectorized results match");
}
catch (Throwable ex) {
log.warn(ex, "failed to sanity check");
log.warn(ex, "non-vectorized and vectorized results do not match");
}

final String sql = QUERIES.get(Integer.parseInt(query));
Expand All @@ -424,11 +428,8 @@ public void setup()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
catch (JsonMappingException e) {
throw new RuntimeException(e);
}
catch (JsonProcessingException e) {
throw new RuntimeException(e);
catch (JsonProcessingException ex) {
log.warn(ex, "explain failed");
}

try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public StructuredData getMaxValue()
@Override
public int getCardinality()
{
return globalDictionary.getCardinality();
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,69 +69,69 @@ public void testKeySizeEstimation()
{
NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4();
int baseCardinality = NullHandling.sqlCompatible() ? 0 : 2;
Assert.assertEquals(baseCardinality, indexer.getCardinality());
Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality());

EncodedKeyComponent<StructuredData> key;
// new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
Assert.assertEquals(228, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false);
Assert.assertEquals(112, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 1, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality());
// new raw value, new field, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(94, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
// adding same value only adds estimated size of value itself
key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false);
Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 2, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality());
// new raw value, new dictionary entry
key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false);
Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 3, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 3, indexer.globalDictionary.getCardinality());

// new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
Assert.assertEquals(276, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
// new raw value, re-use fields and dictionary
key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false);
Assert.assertEquals(56, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
// new raw value, new fields
key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false
);
Assert.assertEquals(286, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());
// new raw value
key = indexer.processRowValsToUnsortedEncodedKeyComponent(
ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)),
false
);
Assert.assertEquals(118, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 5, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality());

key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false);
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(0, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
} else {
Assert.assertEquals(104, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
}

key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false);
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(16, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 6, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality());
} else {
Assert.assertEquals(48, key.getEffectiveSizeBytes());
Assert.assertEquals(baseCardinality + 7, indexer.getCardinality());
Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ public SqlVectorizedExpressionSanityTest(String query)
@Test
public void testQuery()
{
sanityTestVectorizedSqlQueries(PLANNER_FACTORY, query);
sanityTestVectorizedSqlQueries(ENGINE, PLANNER_FACTORY, query);
}

public static void sanityTestVectorizedSqlQueries(PlannerFactory plannerFactory, String query)
public static void sanityTestVectorizedSqlQueries(SqlEngine engine, PlannerFactory plannerFactory, String query)
{
final Map<String, Object> vector = ImmutableMap.of(
QueryContexts.VECTORIZE_KEY, "force",
Expand All @@ -193,8 +193,8 @@ public static void sanityTestVectorizedSqlQueries(PlannerFactory plannerFactory,
);

try (
final DruidPlanner vectorPlanner = plannerFactory.createPlannerForTesting(ENGINE, query, vector);
final DruidPlanner nonVectorPlanner = plannerFactory.createPlannerForTesting(ENGINE, query, nonvector)
final DruidPlanner vectorPlanner = plannerFactory.createPlannerForTesting(engine, query, vector);
final DruidPlanner nonVectorPlanner = plannerFactory.createPlannerForTesting(engine, query, nonvector)
) {
final PlannerResult vectorPlan = vectorPlanner.plan();
final PlannerResult nonVectorPlan = nonVectorPlanner.plan();
Expand Down

0 comments on commit 3c436fa

Please sign in to comment.