From 0df59c6ec8733b86f6c8537b1fabe37c21721242 Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Tue, 22 Oct 2024 12:21:26 -0700 Subject: [PATCH] Fixed logic for getting scorers for sub queries in HQ (#956) Signed-off-by: Martin Gaievski --- ...ch-neural-search.release-notes-2.18.0.0.md | 3 +- .../neuralsearch/query/HybridQueryScorer.java | 7 +- .../query/HybridQueryAggregationsIT.java | 198 +++++++++++++++++- src/test/resources/processor/ingest_bulk.json | 184 ++++++++++++++++ .../neuralsearch/BaseNeuralSearchIT.java | 59 ++++++ .../neuralsearch/util/TestUtils.java | 6 +- 6 files changed, 451 insertions(+), 6 deletions(-) create mode 100644 src/test/resources/processor/ingest_bulk.json diff --git a/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md b/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md index 2b6ef6a66..fc7f1528f 100644 --- a/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md +++ b/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md @@ -5,7 +5,8 @@ Compatible with OpenSearch 2.18.0 ### Features - Introduces ByFieldRerankProcessor for second level reranking on documents ([#932](https://github.com/opensearch-project/neural-search/pull/932)) - +### Bug Fixes +- Fixed incorrect document order for nested aggregations in hybrid query ([#956](https://github.com/opensearch-project/neural-search/pull/956)) ### Enhancements - Implement `ignore_missing` field in text chunking processors ([#907](https://github.com/opensearch-project/neural-search/pull/907)) - Added rescorer in hybrid query ([#917](https://github.com/opensearch-project/neural-search/pull/917)) diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java index 23dbd0e1d..eb410aa23 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java +++ b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java @@ -97,8 +97,13 @@ public int advanceShallow(int target) throws IOException { */ @Override public float score() throws IOException { + return score(getSubMatches()); + } + + private float score(DisiWrapper topList) throws IOException { float totalScore = 0.0f; - for (DisiWrapper disiWrapper : subScorersPQ) { + for (DisiWrapper disiWrapper = topList; disiWrapper != null; disiWrapper = disiWrapper.next) { + // check if this doc has match in the subQuery. If not, add score as 0.0 and continue if (disiWrapper.scorer.docID() == DocIdSetIterator.NO_MORE_DOCS) { continue; } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java index 3bb566aef..cf0a753dc 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java @@ -12,8 +12,10 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.neuralsearch.BaseNeuralSearchIT; +import org.opensearch.script.Script; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.BucketOrder; import org.opensearch.search.aggregations.PipelineAggregatorBuilders; import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.opensearch.search.aggregations.pipeline.AvgBucketPipelineAggregationBuilder; @@ -21,20 +23,25 @@ import org.opensearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; import org.opensearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; import org.opensearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; +import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; -import static org.opensearch.neuralsearch.util.TestUtils.DELTA_FOR_SCORE_ASSERTION; import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getAggregationBuckets; import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getAggregationValue; import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getAggregationValues; import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getAggregations; import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getNestedHits; import static org.opensearch.neuralsearch.util.TestUtils.assertHitResultsFromQuery; +import static org.opensearch.neuralsearch.util.TestUtils.TEST_SPACE_TYPE; +import static org.opensearch.neuralsearch.util.TestUtils.DELTA_FOR_SCORE_ASSERTION; /** * Integration tests for base scenarios when aggregations are combined with hybrid query @@ -42,6 +49,7 @@ public class HybridQueryAggregationsIT extends BaseNeuralSearchIT { private static final String TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS = "test-hybrid-aggs-multi-doc-index-multiple-shards"; private static final String TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_SINGLE_SHARD = "test-hybrid-aggs-multi-doc-index-single-shard"; + private static final String TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS = "test-hybrid-nested-aggs-multi-doc-index"; private static final String TEST_QUERY_TEXT3 = "hello"; private static final String TEST_QUERY_TEXT4 = "everyone"; private static final String TEST_QUERY_TEXT5 = "welcome"; @@ -86,6 +94,12 @@ public class HybridQueryAggregationsIT extends BaseNeuralSearchIT { private static final String BUCKETS_AGGREGATION_NAME_2 = "date_buckets_2"; private static final String BUCKETS_AGGREGATION_NAME_3 = "date_buckets_3"; private static final String BUCKETS_AGGREGATION_NAME_4 = "date_buckets_4"; + protected static final String FLOAT_FIELD_NAME_IMDB = "imdb"; + protected static final String KEYWORD_FIELD_NAME_ACTOR = "actor"; + protected static final String CARDINALITY_OF_UNIQUE_NAMES = "cardinality_of_unique_names"; + protected static final String UNIQUE_NAMES = "unique_names"; + protected static final String AGGREGATION_NAME_MAX_SCORE = "max_score"; + protected static final String AGGREGATION_NAME_TOP_DOC = "top_doc"; @Before public void setUp() throws Exception { @@ -464,6 +478,186 @@ public void testPostFilterOnIndexWithSingleShards_WhenConcurrentSearchEnabled_th testPostFilterWithComplexHybridQuery(true, true); } + @SneakyThrows + public void testNestedAggs_whenMultipleShardsAndConcurrentSearchDisabled_thenSuccessful() { + updateClusterSettings(CONCURRENT_SEGMENT_SEARCH_ENABLED, false); + try { + prepareResourcesForNestegAggregationsScenario(TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS); + assertNestedAggregations(TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS); + } finally { + wipeOfTestResources(TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS, null, null, SEARCH_PIPELINE); + } + } + + @SneakyThrows + public void testNestedAggs_whenMultipleShardsAndConcurrentSearchEnabled_thenSuccessful() { + updateClusterSettings(CONCURRENT_SEGMENT_SEARCH_ENABLED, true); + try { + prepareResourcesForNestegAggregationsScenario(TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS); + assertNestedAggregations(TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS); + } finally { + wipeOfTestResources(TEST_MULTI_DOC_INDEX_FOR_NESTED_AGGS_MULTIPLE_SHARDS, null, null, SEARCH_PIPELINE); + } + } + + private void prepareResourcesForNestegAggregationsScenario(String index) throws Exception { + if (!indexExists(index)) { + createIndexWithConfiguration( + index, + buildIndexConfiguration( + List.of(new KNNFieldConfig("location", 2, TEST_SPACE_TYPE)), + List.of(), + List.of(), + List.of(FLOAT_FIELD_NAME_IMDB), + List.of(KEYWORD_FIELD_NAME_ACTOR), + List.of(), + 3 + ), + "" + ); + + String ingestBulkPayload = Files.readString(Path.of(classLoader.getResource("processor/ingest_bulk.json").toURI())) + .replace("\"{indexname}\"", "\"" + index + "\""); + + bulkIngest(ingestBulkPayload, null); + } + createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE); + } + + private void assertNestedAggregations(String index) { + /* constructing following search query + { + "from": 0, + "aggs": { + "cardinality_of_unique_names": { + "cardinality": { + "field": "actor" + } + }, + "unique_names": { + "terms": { + "field": "actor", + "size": 10, + "order": { + "max_score": "desc" + } + }, + "aggs": { + "top_doc": { + "top_hits": { + "size": 1, + "sort": [ + { + "_score": { + "order": "desc" + } + } + ] + } + }, + "max_score": { + "max": { + "script": { + "source": "_score" + } + } + } + } + } + }, + "query": { + "hybrid": { + "queries": [ + { + "match": { + "actor": "anil" + } + }, + { + "range": { + "imdb": { + "gte": 1.0, + "lte": 10.0 + } + } + } + ]}}} + */ + + QueryBuilder rangeFilterQuery = QueryBuilders.rangeQuery(FLOAT_FIELD_NAME_IMDB).gte(1.0).lte(10.0); + QueryBuilder matchQuery = QueryBuilders.matchQuery(KEYWORD_FIELD_NAME_ACTOR, "anil"); + HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder(); + hybridQueryBuilder.add(matchQuery).add(rangeFilterQuery); + + AggregationBuilder aggsBuilderCardinality = AggregationBuilders.cardinality(CARDINALITY_OF_UNIQUE_NAMES) + .field(KEYWORD_FIELD_NAME_ACTOR); + AggregationBuilder aggsBuilderUniqueNames = AggregationBuilders.terms(UNIQUE_NAMES) + .field(KEYWORD_FIELD_NAME_ACTOR) + .size(10) + .order(BucketOrder.aggregation(AGGREGATION_NAME_MAX_SCORE, false)) + .subAggregation( + AggregationBuilders.topHits(AGGREGATION_NAME_TOP_DOC).size(1).sort(SortBuilders.scoreSort().order(SortOrder.DESC)) + ) + .subAggregation(AggregationBuilders.max(AGGREGATION_NAME_MAX_SCORE).script(new Script("_score"))); + + Map searchResponseAsMap = search( + index, + hybridQueryBuilder, + null, + 10, + Map.of("search_pipeline", SEARCH_PIPELINE), + List.of(aggsBuilderCardinality, aggsBuilderUniqueNames), + rangeFilterQuery, + null, + false, + null, + 0 + ); + assertNotNull(searchResponseAsMap); + + // assert actual results + // aggregations + Map aggregations = getAggregations(searchResponseAsMap); + assertNotNull(aggregations); + + int cardinalityValue = getAggregationValue(aggregations, CARDINALITY_OF_UNIQUE_NAMES); + assertEquals(7, cardinalityValue); + + Map uniqueAggValue = getAggregationValues(aggregations, UNIQUE_NAMES); + assertEquals(3, uniqueAggValue.size()); + assertEquals(0, uniqueAggValue.get("doc_count_error_upper_bound")); + assertEquals(0, uniqueAggValue.get("sum_other_doc_count")); + + List> buckets = getAggregationBuckets(aggregations, UNIQUE_NAMES); + assertNotNull(buckets); + assertEquals(7, buckets.size()); + + // check content of few buckets + Map firstBucket = buckets.get(0); + assertEquals(4, firstBucket.size()); + assertEquals("anil", firstBucket.get(KEY)); + assertEquals(42, firstBucket.get(BUCKET_AGG_DOC_COUNT_FIELD)); + assertNotNull(getAggregationValue(firstBucket, AGGREGATION_NAME_MAX_SCORE)); + assertTrue((double) getAggregationValue(firstBucket, AGGREGATION_NAME_MAX_SCORE) > 1.0f); + + Map secondBucket = buckets.get(1); + assertEquals(4, secondBucket.size()); + assertEquals("abhishek", secondBucket.get(KEY)); + assertEquals(8, secondBucket.get(BUCKET_AGG_DOC_COUNT_FIELD)); + assertNotNull(getAggregationValue(secondBucket, AGGREGATION_NAME_MAX_SCORE)); + assertEquals(1.0, getAggregationValue(secondBucket, AGGREGATION_NAME_MAX_SCORE), DELTA_FOR_SCORE_ASSERTION); + + Map lastBucket = buckets.get(buckets.size() - 1); + assertEquals(4, lastBucket.size()); + assertEquals("sanjay", lastBucket.get(KEY)); + assertEquals(7, lastBucket.get(BUCKET_AGG_DOC_COUNT_FIELD)); + assertNotNull(getAggregationValue(lastBucket, AGGREGATION_NAME_MAX_SCORE)); + assertEquals(1.0, getAggregationValue(lastBucket, AGGREGATION_NAME_MAX_SCORE), DELTA_FOR_SCORE_ASSERTION); + + // assert the hybrid query scores + assertHitResultsFromQuery(10, 92, searchResponseAsMap); + } + private void testMaxAggsOnSingleShardCluster() throws Exception { try { prepareResourcesForSingleShardIndex(TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_SINGLE_SHARD, SEARCH_PIPELINE); @@ -501,8 +695,6 @@ private void testDateRange() throws IOException { try { initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS); createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE); - // try { - // prepareResources(TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS, SEARCH_PIPELINE); AggregationBuilder aggsBuilder = AggregationBuilders.dateRange(DATE_AGGREGATION_NAME) .field(DATE_FIELD_1) diff --git a/src/test/resources/processor/ingest_bulk.json b/src/test/resources/processor/ingest_bulk.json new file mode 100644 index 000000000..6156c3560 --- /dev/null +++ b/src/test/resources/processor/ingest_bulk.json @@ -0,0 +1,184 @@ +{"index": {"_index": "{indexname}"}} +{"id": "s9", "passage_text": "Stenocarpus mynpachtbrief", "imdb": 7.7, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s10", "passage_text": "weirangle meritorious", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s11", "passage_text": "clotbur plagiostome", "imdb": 2.5, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s12", "passage_text": "guider perityphlic", "imdb": 8.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s13", "passage_text": "amidoazobenzene bibliopegy nymphosis", "imdb": 2.5, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s14", "passage_text": "sepiarian antipode unpadded", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s15", "passage_text": "portraiture hemihyperidrosis mongery", "imdb": 8.0, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s16", "passage_text": "pseudoasymmetrical glucolipide nonangling", "imdb": 6.6, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s17", "passage_text": "supraseptal snitch", "imdb": 9.9, "actor": "ranveer"} +{"index": {"_index": "{indexname}"}} +{"id": "s18", "passage_text": "ultragaseous factious", "imdb": 9.9, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s19", "passage_text": "shrewstruck redemptor uninquisitive", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s20", "passage_text": "superindifferent wet chontawood", "imdb": 9.9, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s21", "passage_text": "corporational harebrain", "imdb": 9.8, "actor": "ranveer"} +{"index": {"_index": "{indexname}"}} +{"id": "s22", "passage_text": "aecial proscriptiveness pantometrical", "imdb": 9.8, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s23", "passage_text": "goodlike derived sorriness", "imdb": 6.6, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s24", "passage_text": "precognizant Albertina", "imdb": 9.9, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s25", "passage_text": "defoliated nominatively", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s26", "passage_text": "orthographist pseudoevangelical strideways", "imdb": 2.5, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s27", "passage_text": "delayingly outbleat", "imdb": 8.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s28", "passage_text": "precongratulation phytovitellin", "imdb": 3.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s29", "passage_text": "turtledom illogic interchanger", "imdb": 7.7, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s30", "passage_text": "penetratingly Pyrrhonean pioneership", "imdb": 4.5, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s31", "passage_text": "uneuphemistical acclimatize unpinked", "imdb": 10.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s32", "passage_text": "supercentrifuge sheetflood fairyhood", "imdb": 9.9, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s33", "passage_text": "uninheritable weetbird", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s34", "passage_text": "anelectrotonic Lewanna arsonium", "imdb": 6.6, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s35", "passage_text": "killeen laggen", "imdb": 9.9, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s36", "passage_text": "cyberneticist diffusate", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s37", "passage_text": "neuronal adephagia", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s38", "passage_text": "target crumby pipped", "imdb": 2.5, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s39", "passage_text": "Armoracia Dedanim walking", "imdb": 2.5, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s40", "passage_text": "incrash untrainable", "imdb": 6.6, "actor": "ranveer"} +{"index": {"_index": "{indexname}"}} +{"id": "s41", "passage_text": "scientician sweetfish trithiocarbonic", "imdb": 10.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s42", "passage_text": "gnarly studentlike notchwing", "imdb": 2.5, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s43", "passage_text": "Juha ambassage", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s44", "passage_text": "Skodaic shinbone", "imdb": 7.7, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s45", "passage_text": "parison Meibomia naturopathic", "imdb": 9.9, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s46", "passage_text": "rheologist heartener", "imdb": 2.5, "actor": "ranveer"} +{"index": {"_index": "{indexname}"}} +{"id": "s47", "passage_text": "ungloved telford enfasten", "imdb": 8.0, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s48", "passage_text": "averruncation blotchy schreinerize", "imdb": 8.0, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s49", "passage_text": "graftdom joug unreared", "imdb": 6.6, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s50", "passage_text": "forfeit quercite Typhoean", "imdb": 10.0, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s51", "passage_text": "dimanganous sipunculoid", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s52", "passage_text": "subtotem aogiri", "imdb": 9.8, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s53", "passage_text": "stull outfast", "imdb": 7.7, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s54", "passage_text": "peership Marcionist", "imdb": 3.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s55", "passage_text": "blackish geissospermin phylarchical", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s56", "passage_text": "soapfish skinning stree", "imdb": 7.7, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s57", "passage_text": "agamogony aeonist protractive", "imdb": 9.9, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s58", "passage_text": "descriptory labral", "imdb": 4.5, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s59", "passage_text": "coincline phagedenic", "imdb": 2.5, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s60", "passage_text": "perform meadowsweet outlighten", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s61", "passage_text": "sublevel subcommended", "imdb": 8.0, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s62", "passage_text": "sundra hernanesell interspersal", "imdb": 7.7, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s63", "passage_text": "Buphthalmum Pitcairnia", "imdb": 6.6, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s64", "passage_text": "rheumatismal sporoblast", "imdb": 9.9, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s65", "passage_text": "spahi arteriectopia suburbican", "imdb": 2.5, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s66", "passage_text": "municipalization unsympathy", "imdb": 8.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s67", "passage_text": "pyromucyl breislakite", "imdb": 2.5, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s68", "passage_text": "grandfer kimberlite coattail", "imdb": 10.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s69", "passage_text": "Almohades unchancellor cubitopalmar", "imdb": 2.5, "actor": "ranveer"} +{"index": {"_index": "{indexname}"}} +{"id": "s70", "passage_text": "pierine outswift", "imdb": 9.8, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s71", "passage_text": "precompiler zirconic", "imdb": 3.0, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s72", "passage_text": "idiotical endomesoderm", "imdb": 4.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s73", "passage_text": "unnonsensical chamois vanadous", "imdb": 3.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s74", "passage_text": "Myrmidon semiflashproof", "imdb": 8.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s75", "passage_text": "unapprehendable archheretic", "imdb": 6.6, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s76", "passage_text": "Christmasberry preactive", "imdb": 9.9, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s77", "passage_text": "naphthol Melanthium", "imdb": 7.7, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s78", "passage_text": "unifarious spodomantic birdberry", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s79", "passage_text": "saltativeness sammer", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s80", "passage_text": "nova hypophyseoprivous", "imdb": 7.7, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s81", "passage_text": "hysterogen warmheartedly preflagellate", "imdb": 10.0, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s82", "passage_text": "thruster Johannist", "imdb": 9.8, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s83", "passage_text": "penetrably borsholder", "imdb": 6.6, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s84", "passage_text": "Anomalurus nihility forevermore", "imdb": 9.9, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s85", "passage_text": "nonsystematic nonimmigrant nonburnable", "imdb": 2.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s86", "passage_text": "valeraldehyde interpermeate", "imdb": 8.0, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s87", "passage_text": "noncretaceous Archegosaurus umbelliflorous", "imdb": 9.9, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s88", "passage_text": "chrysochlorous hobbledehoyism pycnite", "imdb": 7.7, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s89", "passage_text": "Puritanize egad unlanguaged", "imdb": 9.8, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s90", "passage_text": "tumasha ingulfment fensive", "imdb": 10.0, "actor": "jackie"} +{"index": {"_index": "{indexname}"}} +{"id": "s91", "passage_text": "headmost nonability noreaster", "imdb": 4.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s92", "passage_text": "spikily sketchy", "imdb": 3.0, "actor": "ranveer"} +{"index": {"_index": "{indexname}"}} +{"id": "s93", "passage_text": "bespecked pushmobile Melanconiales", "imdb": 3.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s94", "passage_text": "Tashnakist equimolar convincingness", "imdb": 9.8, "actor": "salman"} +{"index": {"_index": "{indexname}"}} +{"id": "s95", "passage_text": "mountainy gausterer", "imdb": 4.5, "actor": "anil"} +{"index": {"_index": "{indexname}"}} +{"id": "s96", "passage_text": "dorsoepitrochlear multimarble spheniscomorphic", "imdb": 8.0, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s97", "passage_text": "Fourierism frenum", "imdb": 6.6, "actor": "abhishek"} +{"index": {"_index": "{indexname}"}} +{"id": "s98", "passage_text": "meroblastic gymnoceratous eventognathous", "imdb": 7.7, "actor": "ranbir"} +{"index": {"_index": "{indexname}"}} +{"id": "s99", "passage_text": "purree operatee segregable", "imdb": 8.0, "actor": "sanjay"} +{"index": {"_index": "{indexname}"}} +{"id": "s100", "passage_text": "Garibaldian quickset", "imdb": 9.8, "actor": "jackie"} diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index e6fb45d2a..afc545447 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -11,6 +11,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -785,6 +786,39 @@ protected void bulkAddDocuments(final String index, final String textField, fina assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); } + @SneakyThrows + protected void bulkIngest(final String ingestBulkPayload, final String pipeline) { + Map params = new HashMap<>(); + params.put("refresh", "true"); + if (Objects.nonNull(pipeline)) { + params.put("pipeline", pipeline); + } + Response response = makeRequest( + client(), + "POST", + "_bulk", + params, + toHttpEntity(ingestBulkPayload), + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "Kibana")) + ); + Map map = XContentHelper.convertToMap( + XContentType.JSON.xContent(), + EntityUtils.toString(response.getEntity()), + false + ); + + int failedDocCount = 0; + for (Object item : ((List) map.get("items"))) { + Map> itemMap = (Map>) item; + if (itemMap.get("index").get("error") != null) { + failedDocCount++; + } + } + assertEquals(0, failedDocCount); + + assertEquals("_bulk failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + } + /** * Parse the first returned hit from a search response as a map * @@ -924,6 +958,27 @@ protected String buildIndexConfiguration( final List keywordFields, final List dateFields, final int numberOfShards + ) { + return buildIndexConfiguration( + knnFieldConfigs, + nestedFields, + intFields, + Collections.emptyList(), + keywordFields, + dateFields, + numberOfShards + ); + } + + @SneakyThrows + protected String buildIndexConfiguration( + final List knnFieldConfigs, + final List nestedFields, + final List intFields, + final List floatFields, + final List keywordFields, + final List dateFields, + final int numberOfShards ) { XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() .startObject() @@ -964,6 +1019,10 @@ protected String buildIndexConfiguration( xContentBuilder.startObject(intField).field("type", "integer").endObject(); } + for (String floatField : floatFields) { + xContentBuilder.startObject(floatField).field("type", "float").endObject(); + } + for (String keywordField : keywordFields) { xContentBuilder.startObject(keywordField).field("type", "keyword").endObject(); } diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java b/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java index ab041c440..c10380e87 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java @@ -304,6 +304,10 @@ public static void assertFetchResultScores(FetchSearchResult fetchSearchResult, } public static void assertHitResultsFromQuery(int expected, Map searchResponseAsMap) { + assertHitResultsFromQuery(expected, expected, searchResponseAsMap); + } + + public static void assertHitResultsFromQuery(int expected, int expectedTotal, Map searchResponseAsMap) { assertEquals(expected, getHitCount(searchResponseAsMap)); List> hitsNestedList = getNestedHits(searchResponseAsMap); @@ -321,7 +325,7 @@ public static void assertHitResultsFromQuery(int expected, Map s Map total = getTotalHits(searchResponseAsMap); assertNotNull(total.get("value")); - assertEquals(expected, total.get("value")); + assertEquals(expectedTotal, total.get("value")); assertNotNull(total.get("relation")); assertEquals(RELATION_EQUAL_TO, total.get("relation")); }