-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Text and Keyword aggregation integration tests #176
base: Integ-newDataTypeForTextAggregations
Are you sure you want to change the base?
Changes from 1 commit
dc48abe
2a8fbac
6be26ae
925a5ee
5643039
718210d
924f592
3e4317d
0222df6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.sql.sql; | ||
|
||
import org.junit.Test; | ||
import org.opensearch.sql.legacy.SQLIntegTestCase; | ||
import java.io.IOException; | ||
|
||
|
||
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TEXTKEYWORD; | ||
import static org.opensearch.sql.util.MatcherUtils.schema; | ||
import static org.opensearch.sql.util.MatcherUtils.verifySchema; | ||
|
||
public class TextTypeIT extends SQLIntegTestCase { | ||
|
||
|
||
@Override | ||
public void init() throws Exception { | ||
super.init(); | ||
loadIndex(Index.TEXTKEYWORD); | ||
} | ||
|
||
@Test | ||
public void textKeywordTest() throws IOException { | ||
var result = executeJdbcRequest(String.format("select typeText from %s", TEST_INDEX_TEXTKEYWORD)); | ||
verifySchema(result, | ||
schema("typeText", null, "text")); | ||
} | ||
|
||
@Test | ||
public void aggregateOnText() throws IOException { | ||
var result = executeJdbcRequest(String.format("select sum(int0) from %s GROUP BY typeText", TEST_INDEX_TEXTKEYWORD)); | ||
verifySchema(result, | ||
schema("sum(int0)", null, "integer")); | ||
} | ||
|
||
@Test | ||
public void aggregateOnKeyword() throws IOException { | ||
var result = executeJdbcRequest(String.format("select sum(int0) from %s GROUP BY typeKeyword", TEST_INDEX_TEXTKEYWORD)); | ||
verifySchema(result, | ||
schema("sum(int0)", null, "integer")); | ||
} | ||
|
||
@Test | ||
public void aggregateOnTextFieldData() throws IOException { | ||
var result = executeJdbcRequest(String.format("select sum(int0) from %s GROUP BY typeTextFieldData", TEST_INDEX_TEXTKEYWORD)); | ||
verifySchema(result, | ||
schema("sum(int0)", null, "integer")); | ||
} | ||
|
||
@Test | ||
public void aggregateOnKeywordFieldData() throws IOException { | ||
var result = executeJdbcRequest(String.format("select sum(int0) from %s GROUP BY typeKeywordFieldNoFieldData", TEST_INDEX_TEXTKEYWORD)); | ||
verifySchema(result, | ||
schema("sum(int0)", null, "integer")); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
{ | ||
"mappings" : { | ||
"properties" : { | ||
"typeKeyword" : { | ||
"type" : "keyword" | ||
}, | ||
"typeText" : { | ||
"type" : "text" | ||
}, | ||
"typeKeywordFieldNoFieldData" : { | ||
"type": "text", | ||
"fields": { | ||
"keyword": { | ||
"type": "keyword", | ||
"ignore_above": 256 | ||
} | ||
} }, | ||
"typeTextFieldData" : { | ||
"type": "text", | ||
"fielddata": true, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be interesting to know how it works if we have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in 2a8fbac. It passes. |
||
"fields": { | ||
"keyword": { | ||
"type": "keyword", | ||
"ignore_above": 256 | ||
} | ||
} | ||
}, | ||
"int0" : { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{"index": {}} | ||
{"typeKeyword": "key00", "typeText": "text00", "typeKeywordFieldData": "keyFD00", "typeTextFieldData": "textFD00", "int0": 0} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. none of these documents have text fields larger than 256, so we aren't testing the ignore_above sub-field. Can we do that? Can we create a field with ignore_above of around 10, and add data (with spaces) larger than 10 characters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2a8fbac Added and it passes. |
||
{"index": {}} | ||
{"typeKeyword": "key01", "typeText": "text01", "typeKeywordFieldData": "keyFD01", "typeTextFieldData": "textFD01", "int0": 1} | ||
{"index": {}} | ||
{"typeKeyword": "key02", "typeText": "text02", "typeKeywordFieldData": "keyFD02", "typeTextFieldData": "textFD02", "int0": 2} | ||
{"index": {}} | ||
{"typeKeyword": "key03", "typeText": "text03", "typeKeywordFieldData": "keyFD03", "typeTextFieldData": "textFD03", "int0": 3} | ||
{"index": {}} | ||
{"typeKeyword": "key04", "typeText": "text04", "typeKeywordFieldData": "keyFD04", "typeTextFieldData": "textFD04", "int0": 4} | ||
{"index": {}} | ||
{"typeKeyword": "key05", "typeText": "text05", "typeKeywordFieldData": "keyFD05", "typeTextFieldData": "textFD05", "int0": 5} | ||
{"index": {}} | ||
{"typeKeyword": "key06", "typeText": "text06", "typeKeywordFieldData": "keyFD06", "typeTextFieldData": "textFD06", "int0": 6} | ||
{"index": {}} | ||
{"typeKeyword": "key07", "typeText": "text07", "typeKeywordFieldData": "keyFD07", "typeTextFieldData": "textFD07", "int0": 7} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there any other tests that you tried? These only look like aggregation tests, but it would be good to know if some of the following would also work:
WHERE field LIKE "keyFD??"
WHERE wildcard("field", "keyFD??")
SELECT field LIKE "keyFD??"
and maybe a couple of string-like functions:
SELECT LOCATE("FD", field)
SELECT SUBSTRING(field, 3, 2)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also try
SELECT POSITION(substring IN field)
since it seems to fail for text fields (as Margarit demonstrated)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SELECT POSITION(substring IN field)
Passed:
Failed:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you check selectPositionTextDataFieldNoFields - its giving you a parser error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Otherwise, this looks exclusively like an issue with aggregation on text fields.
Strangely, I thought POSITION was going to fail on text fields, since it was failing for Margarit earlier.