Skip to content

Commit

Permalink
Fixed a bug that does not get a space type from index setting when it…
Browse files Browse the repository at this point in the history
… is empty for compatibility.

Signed-off-by: Dooyong Kim <[email protected]>
  • Loading branch information
Dooyong Kim committed Jan 9, 2025
1 parent 405e5e2 commit e9b42c0
Show file tree
Hide file tree
Showing 11 changed files with 392 additions and 83 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346]
* Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352]
* Fix filter rewrite logic which was resulting in getting inconsistent / incorrect results for cases where filter was getting rewritten for shards (#2359)[https://github.com/opensearch-project/k-NN/pull/2359]
* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2309](https://github.com/opensearch-project/k-NN/pull/2309)
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,14 @@ public void testKNNIndexBinaryForceMerge() throws Exception {
}

// Custom Legacy Field Mapping
// space_type : "linf", engine : "nmslib", m : 2, ef_construction : 2
// space_type : "innerproduct", engine : "nmslib", m : 2, ef_construction : 2
public void testKNNIndexCustomLegacyFieldMapping() throws Exception {

// When the cluster is in old version, create a KNN index with custom legacy field mapping settings
// and add documents into that index
if (isRunningAgainstOldCluster()) {
Settings.Builder indexMappingSettings = createKNNIndexCustomLegacyFieldMappingIndexSettingsBuilder(
SpaceType.LINF,
SpaceType.INNER_PRODUCT,
KNN_ALGO_PARAM_M_MIN_VALUE,
KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,6 @@ public void testKNNL1ScriptScore() throws Exception {
}
}

// KNN script scoring for space_type "linf"
public void testKNNLinfScriptScore() throws Exception {
if (isRunningAgainstOldCluster()) {
createKnnIndex(testIndex, createKNNDefaultScriptScoreSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
QUERY_COUNT = NUM_DOCS;
DOC_ID = NUM_DOCS;
validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.LINF);
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
QUERY_COUNT = QUERY_COUNT + NUM_DOCS;
validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.LINF);
deleteKNNIndex(testIndex);
}
}

// KNN script scoring for space_type "innerproduct"
public void testKNNInnerProductScriptScore() throws Exception {
if (isRunningAgainstOldCluster()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ public void testKNNWarmupDefaultLegacyFieldMapping() throws Exception {
}

// Custom Legacy Field Mapping
// space_type : "linf", engine : "nmslib", m : 2, ef_construction : 2
// space_type : "innerproduct", engine : "nmslib", m : 2, ef_construction : 2
public void testKNNWarmupCustomLegacyFieldMapping() throws Exception {

// When the cluster is in old version, create a KNN index with custom legacy field mapping settings
// and add documents into that index
if (isRunningAgainstOldCluster()) {
Settings.Builder indexMappingSettings = createKNNIndexCustomLegacyFieldMappingIndexSettingsBuilder(
SpaceType.LINF,
SpaceType.INNER_PRODUCT,
KNN_ALGO_PARAM_M_MIN_VALUE,
KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
package org.opensearch.knn.index.engine;

import org.apache.logging.log4j.util.Strings;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.MapperParsingException;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;

Expand All @@ -25,24 +27,30 @@ public final class SpaceTypeResolver {
private SpaceTypeResolver() {}

/**
* Resolves space type from configuration details. It is guaranteed not to return either null or
* {@link SpaceType#UNDEFINED}
* Resolves space type from configuration details. It is guaranteed not to return null.
* When space is not in either method and top level, UNDEFINED will be returned.
* This can happen when it is defined at index level which is deprecated and no longer allowed in the future.
* In this case, UNDEFINED will be returned.
*
* @param knnMethodContext Method context
* @param vectorDataType Vectordatatype
* @param knnMethodContext Method context
* @param topLevelSpaceTypeString Alternative top-level space type
* @return {@link SpaceType} for the method
*/
public SpaceType resolveSpaceType(
final KNNMethodContext knnMethodContext,
final VectorDataType vectorDataType,
final String topLevelSpaceTypeString
final String topLevelSpaceTypeString,
final Settings indexSettings,
final VectorDataType vectorDataType
) {
SpaceType methodSpaceType = getSpaceTypeFromMethodContext(knnMethodContext);
SpaceType topLevelSpaceType = getSpaceTypeFromString(topLevelSpaceTypeString);

if (isSpaceTypeConfigured(methodSpaceType) == false && isSpaceTypeConfigured(topLevelSpaceType) == false) {
return getSpaceTypeFromVectorDataType(vectorDataType);
final String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
return getDefaultSpaceType(vectorDataType);
}
return SpaceType.getSpace(spaceType);
}

if (isSpaceTypeConfigured(methodSpaceType) == false) {
Expand All @@ -67,6 +75,13 @@ public SpaceType resolveSpaceType(
);
}

public static SpaceType getDefaultSpaceType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethodContext) {
if (knnMethodContext == null) {
return SpaceType.UNDEFINED;
Expand All @@ -75,13 +90,6 @@ private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethod
return knnMethodContext.getSpaceType();
}

private SpaceType getSpaceTypeFromVectorDataType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromString(final String spaceType) {
if (Strings.isEmpty(spaceType)) {
return SpaceType.UNDEFINED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -390,13 +390,20 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
// If the original knnMethodContext is not null, resolve its space type and engine from the rest of the
// configuration. This is consistent with the existing behavior for space type in 2.16 where we modify the
// parsed value
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
final SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
builder.originalParameters.getKnnMethodContext(),
builder.vectorDataType.get(),
builder.topLevelSpaceType.get()
builder.topLevelSpaceType.get(),
parserContext.getSettings(),
builder.vectorDataType.get()
);

// Set space type to the original knnMethodContext. Since the resolved one can be UNDEFINED,
// we must wrap it and pick up the default when it is UNDEFINED.
setSpaceType(builder.originalParameters.getKnnMethodContext(), resolvedSpaceType);
validateSpaceType(builder);

// Resolve method component. For the legacy case where space type can be configured at index level,
// it first tries to use the given one then tries to get it from index setting when the space type is UNDEFINED.
resolveKNNMethodComponents(builder, parserContext, resolvedSpaceType);
validateFromKNNMethod(builder);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static boolean useLuceneKNNVectorsFormat(final Version indexCreatedVersion) {
return indexCreatedVersion.onOrAfter(Version.V_2_17_0);
}

private static SpaceType getSpaceType(final Settings indexSettings) {
public static SpaceType getSpaceType(final Settings indexSettings) {
String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
spaceType = KNNSettings.INDEX_KNN_DEFAULT_SPACE_TYPE;
Expand Down Expand Up @@ -196,15 +196,11 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
static KNNMethodContext createKNNMethodContextFromLegacy(
Settings indexSettings,
Version indexCreatedVersion,
SpaceType topLevelSpaceType
SpaceType resolvedSpaceType
) {
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
? topLevelSpaceType
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
return new KNNMethodContext(
KNNEngine.NMSLIB,
finalSpaceToSet,
resolvedSpaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
vectorDataType = VectorDataType.DEFAULT;
}

if ((knnMethodContext == null || knnMethodContext.getSpaceType() == SpaceType.UNDEFINED)
&& topLevelSpaceType == SpaceType.UNDEFINED) {
topLevelSpaceType = SpaceTypeResolver.getDefaultSpaceType(vectorDataType);
}

ensureIfSetThenEquals(
MODE_PARAMETER,
mode,
Expand All @@ -161,8 +166,9 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
);
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
knnMethodContext,
vectorDataType,
topLevelSpaceType.getValue()
topLevelSpaceType.getValue(),
null,
vectorDataType
);
setSpaceType(knnMethodContext, resolvedSpaceType);
TrainingModelRequest trainingModelRequest = new TrainingModelRequest(
Expand Down
72 changes: 69 additions & 3 deletions src/test/java/org/opensearch/knn/index/KNNESSettingsTestIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,91 @@

package org.opensearch.knn.index;

import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.opensearch.client.Response;
import org.opensearch.client.ResponseException;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.KNNResult;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.hamcrest.Matchers.containsString;
import static org.opensearch.knn.TestUtils.KNN_VECTOR;
import static org.opensearch.knn.TestUtils.PROPERTIES;
import static org.opensearch.knn.TestUtils.VECTOR_TYPE;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;

public class KNNESSettingsTestIT extends KNNRestTestCase {

public static final int ALWAYS_BUILD_GRAPH = 0;

public void testKNNLegacySpaceTypeIndexingTest() throws IOException, ParseException {
// Configure space_type at index level. This is deprecated and will be removed in the future.
final Settings indexSettings = Settings.builder()
.put("index.knn", true)
.put("knn.algo_param.ef_search", 100)
.put("knn.space_type", SpaceType.INNER_PRODUCT.getValue())
.build();

// This mapping does not have method.
final String testField = "knn_field";
final String testIndex = "knn_index";
final String mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject(PROPERTIES)
.startObject(testField)
.field(VECTOR_TYPE, KNN_VECTOR)
.field(DIMENSION, 2)
.endObject()
.endObject()
.endObject()
.toString();

createKnnIndex(testIndex, indexSettings, mapping);

// Ingest data.
float[] queryVector = new float[] { 2, 3 };
final int k = 2;

float[][] vectorData = new float[5][2];
vectorData[0] = new float[] { 11.7f, 2.7f }; // distance=31.5
vectorData[1] = new float[] { 20.9f, 3.9f }; // distance=53.5 <- answer
vectorData[2] = new float[] { 3.77f, 4.22f }; // distance=20.2
vectorData[3] = new float[] { 15, 6 }; // distance=48 <- answer
vectorData[4] = new float[] { 4.7f, 5.9f }; // distance=27.1

bulkAddKnnDocs(testIndex, testField, vectorData, vectorData.length);
flushIndex(testIndex);

// Send a query and verify scores are correct.
Response searchResponse = searchKNNIndex(
testIndex,
KNNQueryBuilder.builder().k(k).fieldName(testField).vector(queryVector).build(),
k
);

List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), testField);

assertEquals(k, results.size());
Set<String> docIds = new HashSet<>();
for (KNNResult result : results) {
docIds.add(result.getDocId());
}
assertEquals(new HashSet<>(Arrays.asList("1", "3")), docIds);
}

/**
* KNN Index writes should be blocked when the plugin disabled
* @throws Exception Exception from test
Expand Down
Loading

0 comments on commit e9b42c0

Please sign in to comment.