Skip to content

Commit

Permalink
add SYNTHETIC ColumnMetadata.Kind to represent the score column
Browse files Browse the repository at this point in the history
  • Loading branch information
jbellis committed Nov 25, 2024
1 parent 208f88f commit a5060e9
Show file tree
Hide file tree
Showing 11 changed files with 196 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,21 @@ abstract class ColumnFilterFactory
*/
abstract ColumnFilter newInstance(List<Selector> selectors);

public static ColumnFilterFactory wildcard(TableMetadata table)
public static ColumnFilterFactory wildcard(TableMetadata table, Set<ColumnMetadata> orderingColumns)
{
return new PrecomputedColumnFilter(ColumnFilter.all(table));
ColumnFilter cf;
if (orderingColumns.isEmpty())
{
cf = ColumnFilter.all(table);
}
else
{
ColumnFilter.Builder builder = ColumnFilter.selectionBuilder();
builder.addAll(table.regularAndStaticColumns());
builder.addAll(orderingColumns);
cf = builder.build();
}
return new PrecomputedColumnFilter(cf);
}

public static ColumnFilterFactory fromColumns(TableMetadata table,
Expand Down
23 changes: 21 additions & 2 deletions src/java/org/apache/cassandra/cql3/selection/Selection.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,24 @@ public abstract class Selection
private static final Predicate<ColumnMetadata> STATIC_COLUMN_FILTER = (column) -> column.isStatic();

private final TableMetadata table;

// Full list of columns needed for processing the query, including selected columns, ordering columns,
// and columns needed for restrictions. Wildcard columns are fully materialized here.
//
// This also includes synthetic columns, because unlike all the other not-physical-columns selectables, they are
// computed on the replica instead of the coordinator and so, like physical columns, they need to be sent back
// as part of the result.
private final List<ColumnMetadata> columns;

// maps ColumnSpecifications (columns, function calls, aliases) to the columns backing them
private final SelectionColumnMapping columnMapping;

// metadata matching the ColumnSpcifications
protected final ResultSet.ResultMetadata metadata;

// creates a ColumnFilter that breaks columns into `queried` and `fetched`
protected final ColumnFilterFactory columnFilterFactory;

protected final boolean isJson;

// Columns used to order the result set for JSON queries with post ordering.
Expand Down Expand Up @@ -126,10 +140,15 @@ public ResultSet.ResultMetadata getResultMetadata()
}

public static Selection wildcard(TableMetadata table, boolean isJson, boolean returnStaticContentOnPartitionWithNoRows)
{
return wildcard(table, Collections.emptySet(), isJson, returnStaticContentOnPartitionWithNoRows);
}

public static Selection wildcard(TableMetadata table, Set<ColumnMetadata> orderingColumns, boolean isJson, boolean returnStaticContentOnPartitionWithNoRows)
{
List<ColumnMetadata> all = new ArrayList<>(table.columns().size());
Iterators.addAll(all, table.allColumnsInSelectOrder());
return new SimpleSelection(table, all, Collections.emptySet(), true, isJson, returnStaticContentOnPartitionWithNoRows);
return new SimpleSelection(table, all, orderingColumns, true, isJson, returnStaticContentOnPartitionWithNoRows);
}

public static Selection wildcardWithGroupBy(TableMetadata table,
Expand Down Expand Up @@ -400,7 +419,7 @@ public SimpleSelection(TableMetadata table,
selectedColumns,
orderingColumns,
SelectionColumnMapping.simpleMapping(selectedColumns),
isWildcard ? ColumnFilterFactory.wildcard(table)
isWildcard ? ColumnFilterFactory.wildcard(table, orderingColumns)
: ColumnFilterFactory.fromColumns(table, selectedColumns, orderingColumns, Collections.emptySet(), returnStaticContentOnPartitionWithNoRows),
isWildcard,
isJson);
Expand Down
18 changes: 8 additions & 10 deletions src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
Original file line number Diff line number Diff line change
Expand Up @@ -1080,12 +1080,16 @@ void processPartition(RowIterator partition, QueryOptions options, ResultSetBuil
case CLUSTERING:
result.add(row.clustering().bufferAt(def.position()));
break;
case SYNTHETIC:
// treat as REGULAR
case REGULAR:
result.add(row.getColumnData(def), nowInSec);
break;
case STATIC:
result.add(staticRow.getColumnData(def), nowInSec);
break;
default:
throw new AssertionError();
}
}
}
Expand Down Expand Up @@ -1159,7 +1163,7 @@ public SelectStatement prepare(boolean forView, UnaryOperator<String> keyspaceMa

// Besides actual restrictions (where clauses), prepareRestrictions will include pseudo-restrictions
// on indexed columns to allow pushing ORDER BY into the index; see StatementRestrictions::addOrderingRestrictions.
// Therefore, we don't want to convert the Ordering column into a +score column until after that.
// Therefore, we don't want to convert an ANN Ordering column into a +score column until after that.
List<Ordering> orderings = getOrderings(table);
StatementRestrictions restrictions = prepareRestrictions(
table, bindVariables, orderings, containsOnlyStaticColumns, forView);
Expand Down Expand Up @@ -1235,14 +1239,8 @@ private Map<ColumnMetadata, Ordering> getScoreOrdering(List<Ordering> orderings)
return null;

// Create synthetic score column
// Use the original column's table metadata but create new identifier and type
ColumnMetadata sourceColumn = expr.getColumn();
var cm = new ColumnMetadata(sourceColumn.ksName,
sourceColumn.cfName,
new ColumnIdentifier("+score", true),
FloatType.instance,
ColumnMetadata.NO_POSITION,
ColumnMetadata.Kind.REGULAR);
var cm = ColumnMetadata.syntheticColumn(sourceColumn.ksName, sourceColumn.cfName, ColumnMetadata.SYNTHETIC_SCORE_ID, FloatType.instance);
return Map.of(cm, orderings.get(0));
}

Expand All @@ -1264,7 +1262,7 @@ private Selection prepareSelection(TableMetadata table,
if (selectables.isEmpty()) // wildcard query
{
return hasGroupBy ? Selection.wildcardWithGroupBy(table, boundNames, parameters.isJson, restrictions.returnStaticContentOnPartitionWithNoRows())
: Selection.wildcard(table, parameters.isJson, restrictions.returnStaticContentOnPartitionWithNoRows());
: Selection.wildcard(table, resultSetOrderingColumns, parameters.isJson, restrictions.returnStaticContentOnPartitionWithNoRows());
}

return Selection.fromSelectors(table,
Expand Down Expand Up @@ -1502,7 +1500,7 @@ private boolean isReversed(TableMetadata table, Map<ColumnMetadata, Ordering> or
{
ColumnMetadata def = entry.getKey();
Ordering ordering = entry.getValue();
// We defined ANN OF to be ASC ordering, as in, "order by near-ness". But since score goves from
// We defined ANN OF to be ASC ordering, as in, "order by near-ness". But since score goes from
// 0 (worst) to 1 (closest), we need to reverse the ordering for the comparator when we're sorting
// by synthetic +score column.
boolean cqlReversed = ordering.direction == Ordering.Direction.DESC;
Expand Down
99 changes: 88 additions & 11 deletions src/java/org/apache/cassandra/db/Columns.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import net.nicoulaj.compilecommand.annotations.DontInline;
import org.apache.cassandra.cql3.ColumnIdentifier;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.SetType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.db.rows.ColumnData;
Expand All @@ -36,6 +37,7 @@
import org.apache.cassandra.io.util.DataOutputPlus;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.serializers.AbstractTypeSerializer;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.ObjectSizes;
import org.apache.cassandra.utils.SearchIterator;
Expand Down Expand Up @@ -334,6 +336,11 @@ public Iterator<ColumnMetadata> simpleColumns()
return BTree.iterator(columns, 0, complexIdx - 1, BTree.Dir.ASC);
}

public Iterator<ColumnMetadata> simpleColumnsDesc()
{
return BTree.iterator(columns, 0, complexIdx - 1, BTree.Dir.DESC);
}

/**
* Iterator over the complex columns of this object.
*
Expand Down Expand Up @@ -459,42 +466,112 @@ public String toString()

public static class Serializer
{
AbstractTypeSerializer typeSerializer = new AbstractTypeSerializer();

public void serialize(Columns columns, DataOutputPlus out) throws IOException
{
out.writeUnsignedVInt(columns.size());
int regularCount = 0;
int syntheticCount = 0;

// Count regular and synthetic columns
for (ColumnMetadata column : columns)
{
if (column.isSynthetic())
syntheticCount++;
else
regularCount++;
}

// Jam the two counts into a single value to avoid massive backwards compatibility issues
long packedCount = getPackedCount(syntheticCount, regularCount);
out.writeUnsignedVInt(packedCount);

// First pass - write regular columns
for (ColumnMetadata column : columns)
ByteBufferUtil.writeWithVIntLength(column.name.bytes, out);
{
if (!column.isSynthetic())
ByteBufferUtil.writeWithVIntLength(column.name.bytes, out);
}

// Second pass - write synthetic columns with their full metadata
for (ColumnMetadata column : columns)
{
if (column.isSynthetic())
{
ByteBufferUtil.writeWithVIntLength(column.name.bytes, out);
typeSerializer.serialize(column.type, out);
}
}
}

private static long getPackedCount(int syntheticCount, int regularCount)
{
// Left shift of 20 gives us over 1M regular columns, and up to 4 synthetic columns
// before overflowing to a 4th byte.
return ((long) syntheticCount << 20) | regularCount;
}

public long serializedSize(Columns columns)
{
long size = TypeSizes.sizeofUnsignedVInt(columns.size());
int regularCount = 0;
int syntheticCount = 0;
long size = 0;

// Count and calculate sizes
for (ColumnMetadata column : columns)
size += ByteBufferUtil.serializedSizeWithVIntLength(column.name.bytes);
return size;
{
if (column.isSynthetic())
{
syntheticCount++;
size += ByteBufferUtil.serializedSizeWithVIntLength(column.name.bytes);
size += typeSerializer.serializedSize(column.type);
}
else
{
regularCount++;
size += ByteBufferUtil.serializedSizeWithVIntLength(column.name.bytes);
}
}

return TypeSizes.sizeofUnsignedVInt(getPackedCount(syntheticCount, regularCount)) +
size;
}

public Columns deserialize(DataInputPlus in, TableMetadata metadata) throws IOException
{
int length = (int)in.readUnsignedVInt();
try (BTree.FastBuilder<ColumnMetadata> builder = BTree.fastBuilder())
{
for (int i = 0; i < length; i++)
long packedCount = in.readUnsignedVInt() ;
int regularCount = (int) (packedCount & 0xFFFFF);
int syntheticCount = (int) (packedCount >> 20);

// First pass - regular columns
for (int i = 0; i < regularCount; i++)
{
ByteBuffer name = ByteBufferUtil.readWithVIntLength(in);
ColumnMetadata column = metadata.getColumn(name);
if (column == null)
{
// If we don't find the definition, it could be we have data for a dropped column, and we shouldn't
// fail deserialization because of that. So we grab a "fake" ColumnMetadata that ensure proper
// deserialization. The column will be ignore later on anyway.
// If we don't find the definition, it could be we have data for a dropped column
column = metadata.getDroppedColumn(name);

if (column == null)
throw new RuntimeException("Unknown column " + UTF8Type.instance.getString(name) + " during deserialization");
}
builder.add(column);
}

// Second pass - synthetic columns
for (int i = 0; i < syntheticCount; i++)
{
ByteBuffer name = ByteBufferUtil.readWithVIntLength(in);
AbstractType<?> type = typeSerializer.deserialize(in);

if (!name.equals(ColumnMetadata.SYNTHETIC_SCORE_ID.bytes))
throw new IllegalStateException("Unknown synthetic column " + UTF8Type.instance.getString(name));

ColumnMetadata column = ColumnMetadata.syntheticColumn(metadata.keyspace, metadata.name, ColumnMetadata.SYNTHETIC_SCORE_ID, type);
builder.add(column);
}
return new Columns(builder.build());
}
}
Expand Down
20 changes: 17 additions & 3 deletions src/java/org/apache/cassandra/db/ReadCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import org.apache.cassandra.net.Message;
import org.apache.cassandra.net.MessageFlag;
import org.apache.cassandra.net.Verb;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.IndexMetadata;
import org.apache.cassandra.schema.Schema;
import org.apache.cassandra.schema.SchemaConstants;
Expand Down Expand Up @@ -413,9 +414,9 @@ public UnfilteredPartitionIterator executeLocally(ReadExecutionController execut
}

Context context = Context.from(this);
UnfilteredPartitionIterator iterator = (null == searcher) ? Transformation.apply(queryStorage(cfs, executionController), new TrackingRowIterator(context))
: Transformation.apply(searchStorage(searcher, executionController), new TrackingRowIterator(context));

var storageTarget = (null == searcher) ? queryStorage(cfs, executionController)
: searchStorage(searcher, executionController);
UnfilteredPartitionIterator iterator = Transformation.apply(storageTarget, new TrackingRowIterator(context));
iterator = RTBoundValidator.validate(iterator, Stage.MERGED, false);

try
Expand Down Expand Up @@ -1047,6 +1048,19 @@ public ReadCommand deserialize(DataInputPlus in, int version) throws IOException
TableMetadata metadata = schema.getExistingTableMetadata(TableId.deserialize(in));
int nowInSec = in.readInt();
ColumnFilter columnFilter = ColumnFilter.serializer.deserialize(in, version, metadata);

// add synthetic columns to the tablemetadata so we can serialize them in our response
var tmb = metadata.unbuild();
for (var it = columnFilter.fetchedColumns().regulars.simpleColumnsDesc(); it.hasNext(); )
{
var c = it.next();
// synthetic columns sort last, so when we hit the first non-synthetic, we're done
if (!c.isSynthetic())
break;
tmb.addColumn(ColumnMetadata.syntheticColumn(c.ksName, c.cfName, c.name, c.type));
}
metadata = tmb.build();

RowFilter rowFilter = RowFilter.serializer.deserialize(in, version, metadata);
DataLimits limits = DataLimits.serializer.deserialize(in, version, metadata.comparator);
Index.QueryPlan indexQueryPlan = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public Builder add(ColumnMetadata c)
}
else
{
assert c.isRegular();
assert c.isRegular() || c.isSynthetic();
if (regularColumns == null)
regularColumns = BTree.builder(naturalOrder());
regularColumns.add(c);
Expand Down
Loading

0 comments on commit a5060e9

Please sign in to comment.