From 45ff4f59ac40d1b3f725effc4d3e79ff4f39d24f Mon Sep 17 00:00:00 2001 From: Muneer Kolarkunnu <33829651+akolarkunnu@users.noreply.github.com> Date: Fri, 15 Nov 2024 01:23:41 +0530 Subject: [PATCH 1/7] [FEATURE]Improve test coverage for RemoteModel.java (#3205) * [FEATURE]Improve test coverage for RemoteModel.java Added new tests for missing coverage. Mainly coverage was missing for catching exceptions in the methods initModel() and asyncPredict(). Also renamed some tests to match with testing methods. Resolves #1382 Signed-off-by: Abdul Muneer Kolarkunnu * [FEATURE]Improve test coverage for RemoteModel.java Added new tests for missing coverage. Mainly coverage was missing for catching exceptions in the methods initModel() and asyncPredict(). Also renamed some tests to match with testing methods. Resolves #1382 Signed-off-by: Abdul Muneer Kolarkunnu * [FEATURE]Improve test coverage for RemoteModel.java Added new tests for missing coverage. Mainly coverage was missing for catching exceptions in the methods initModel() and asyncPredict(). Also renamed some tests to match with testing methods. Resolves #1382 Signed-off-by: Abdul Muneer Kolarkunnu * [FEATURE]Improve test coverage for RemoteModel.java Added new tests for missing coverage. Mainly coverage was missing for catching exceptions in the methods initModel() and asyncPredict(). Also renamed some tests to match with testing methods. Resolves #1382 Signed-off-by: Abdul Muneer Kolarkunnu --------- Signed-off-by: Abdul Muneer Kolarkunnu --- .../algorithms/remote/RemoteModelTest.java | 92 +++++++++++++++++-- 1 file changed, 83 insertions(+), 9 deletions(-) diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/remote/RemoteModelTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/remote/RemoteModelTest.java index c14b329586..075019834c 100644 --- a/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/remote/RemoteModelTest.java +++ b/ml-algorithms/src/test/java/org/opensearch/ml/engine/algorithms/remote/RemoteModelTest.java @@ -14,6 +14,7 @@ import static org.mockito.Mockito.when; import java.util.Arrays; +import java.util.Collections; import java.util.Map; import org.junit.Assert; @@ -23,6 +24,7 @@ import org.junit.rules.ExpectedException; import org.mockito.ArgumentCaptor; import org.mockito.Mock; +import org.mockito.MockedStatic; import org.mockito.MockitoAnnotations; import org.opensearch.core.action.ActionListener; import org.opensearch.ml.common.MLModel; @@ -30,14 +32,18 @@ import org.opensearch.ml.common.connector.ConnectorAction; import org.opensearch.ml.common.connector.ConnectorProtocols; import org.opensearch.ml.common.connector.HttpConnector; +import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet; +import org.opensearch.ml.common.exception.MLException; import org.opensearch.ml.common.input.MLInput; import org.opensearch.ml.common.transport.MLTaskResponse; +import org.opensearch.ml.engine.MLEngineClassLoader; +import org.opensearch.ml.engine.MLStaticMockBase; import org.opensearch.ml.engine.encryptor.Encryptor; import org.opensearch.ml.engine.encryptor.EncryptorImpl; import com.google.common.collect.ImmutableMap; -public class RemoteModelTest { +public class RemoteModelTest extends MLStaticMockBase { @Mock MLInput mlInput; @@ -45,6 +51,9 @@ public class RemoteModelTest { @Mock MLModel mlModel; + @Mock + RemoteConnectorExecutor remoteConnectorExecutor; + @Rule public ExpectedException exceptionRule = ExpectedException.none(); @@ -73,7 +82,7 @@ public void test_predict_throw_IllegalStateException() { } @Test - public void predict_NullConnectorExecutor() { + public void asyncPredict_NullConnectorExecutor() { ActionListener actionListener = mock(ActionListener.class); remoteModel.asyncPredict(mlInput, actionListener); ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); @@ -86,7 +95,18 @@ public void predict_NullConnectorExecutor() { } @Test - public void predict_ModelDeployed_WrongInput() { + public void asyncPredict_ModelDeployed_WrongInput() { + asyncPredict_ModelDeployed_WrongInput("pre_process_function not defined in connector"); + } + + @Test + public void asyncPredict_With_RemoteInferenceInputDataSet() { + when(mlInput.getInputDataset()).thenReturn( + new RemoteInferenceInputDataSet(Collections.emptyMap(), ConnectorAction.ActionType.BATCH_PREDICT)); + asyncPredict_ModelDeployed_WrongInput("no BATCH_PREDICT action found"); + } + + private void asyncPredict_ModelDeployed_WrongInput(String expExceptionMessage) { Connector connector = createConnector(ImmutableMap.of("Authorization", "Bearer ${credential.key}")); when(mlModel.getConnector()).thenReturn(connector); remoteModel.initModel(mlModel, ImmutableMap.of(), encryptor); @@ -95,16 +115,71 @@ public void predict_ModelDeployed_WrongInput() { ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(actionListener).onFailure(argumentCaptor.capture()); assert argumentCaptor.getValue() instanceof RuntimeException; - assertEquals("pre_process_function not defined in connector", argumentCaptor.getValue().getMessage()); + assertEquals(expExceptionMessage, argumentCaptor.getValue().getMessage()); } @Test - public void initModel_RuntimeException() { - exceptionRule.expect(IllegalArgumentException.class); - exceptionRule.expectMessage("Tag mismatch!"); + public void asyncPredict_Failure_With_RuntimeException() { + asyncPredict_Failure_With_Throwable( + new RuntimeException("Remote Connection Exception!"), + RuntimeException.class, + "Remote Connection Exception!" + ); + } + + @Test + public void asyncPredict_Failure_With_Throwable() { + asyncPredict_Failure_With_Throwable( + new Error("Remote Connection Error!"), + MLException.class, + "java.lang.Error: Remote Connection Error!" + ); + } + + private void asyncPredict_Failure_With_Throwable( + Throwable actualException, + Class expExceptionClass, + String expExceptionMessage + ) { + ActionListener actionListener = mock(ActionListener.class); + doThrow(actualException) + .when(remoteConnectorExecutor) + .executeAction(ConnectorAction.ActionType.PREDICT.toString(), mlInput, actionListener); + try (MockedStatic loader = mockStatic(MLEngineClassLoader.class)) { + Connector connector = createConnector(ImmutableMap.of("Authorization", "Bearer ${credential.key}")); + when(mlModel.getConnector()).thenReturn(connector); + loader + .when(() -> MLEngineClassLoader.initInstance(connector.getProtocol(), connector, Connector.class)) + .thenReturn(remoteConnectorExecutor); + remoteModel.initModel(mlModel, ImmutableMap.of(), encryptor); + remoteModel.asyncPredict(mlInput, actionListener); + ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); + verify(actionListener).onFailure(argumentCaptor.capture()); + assert expExceptionClass.isInstance(argumentCaptor.getValue()); + assertEquals(expExceptionMessage, argumentCaptor.getValue().getMessage()); + } + } + + @Test + public void initModel_Failure_With_RuntimeException() { + initModel_Failure_With_Throwable(new IllegalArgumentException("Tag mismatch!"), IllegalArgumentException.class, "Tag mismatch!"); + } + + @Test + public void initModel_Failure_With_Throwable() { + initModel_Failure_With_Throwable(new Error("Decryption Error!"), MLException.class, "Decryption Error!"); + } + + private void initModel_Failure_With_Throwable( + Throwable actualException, + Class expExcepClass, + String expExceptionMessage + ) { + exceptionRule.expect(expExcepClass); + exceptionRule.expectMessage(expExceptionMessage); Connector connector = createConnector(null); when(mlModel.getConnector()).thenReturn(connector); - doThrow(new IllegalArgumentException("Tag mismatch!")).when(encryptor).decrypt(any()); + doThrow(actualException).when(encryptor).decrypt(any()); remoteModel.initModel(mlModel, ImmutableMap.of(), encryptor); } @@ -129,7 +204,6 @@ public void initModel_WithHeader() { Assert.assertNotNull(executor.getConnector().getDecryptedHeaders()); assertEquals(1, executor.getConnector().getDecryptedHeaders().size()); assertEquals("Bearer test_api_key", executor.getConnector().getDecryptedHeaders().get("Authorization")); - remoteModel.close(); Assert.assertNull(remoteModel.getConnectorExecutor()); } From 9c1b8a8c1e28ab8bdaf507cb71799c4809c3d8c6 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 15 Nov 2024 16:18:22 -0500 Subject: [PATCH 2/7] Enable custom start commands and options to resolve GHA issues (#3223) Signed-off-by: Peter Zhu --- .github/workflows/CI-workflow.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/CI-workflow.yml b/.github/workflows/CI-workflow.yml index 14632e46db..7c38b23eab 100644 --- a/.github/workflows/CI-workflow.yml +++ b/.github/workflows/CI-workflow.yml @@ -27,8 +27,6 @@ jobs: strategy: matrix: java: [21] - env: - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true name: Build and Test MLCommons Plugin on linux if: github.repository == 'opensearch-project/ml-commons' @@ -41,21 +39,23 @@ jobs: # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} - name: Setup Java ${{ matrix.java }} uses: actions/setup-java@v1 with: java-version: ${{ matrix.java }} - - uses: aws-actions/configure-aws-credentials@v2 + - uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.ML_ROLE }} aws-region: us-west-2 - name: Checkout MLCommons - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} @@ -77,7 +77,7 @@ jobs: echo "build-test-linux=$plugin" >> $GITHUB_OUTPUT - name: Upload Coverage Report - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v4 with: flags: ml-commons token: ${{ secrets.CODECOV_TOKEN }} @@ -101,7 +101,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: aws-actions/configure-aws-credentials@v2 + - uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.ML_ROLE }} aws-region: us-west-2 @@ -112,11 +112,11 @@ jobs: java-version: ${{ matrix.java }} - name: Checkout MLCommons - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v4.1.7 + - uses: actions/download-artifact@v4 with: name: ml-plugin-linux-${{ matrix.java }} @@ -181,7 +181,7 @@ jobs: fi - name: Upload Coverage Report - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: flags: ml-commons token: ${{ secrets.CODECOV_TOKEN }} @@ -202,14 +202,14 @@ jobs: with: java-version: ${{ matrix.java }} - - uses: aws-actions/configure-aws-credentials@v2 + - uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.ML_ROLE }} aws-region: us-west-2 # ml-commons - name: Checkout MLCommons - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} From 7041c225875709719262853064ae7465bc4cd042 Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Mon, 18 Nov 2024 10:33:01 -0800 Subject: [PATCH 3/7] adding multi-modal pre-processor for cohere (#3219) * adding multi-modal pre-processor for cohere Signed-off-by: Dhrubo Saha * added javadoc Signed-off-by: Dhrubo Saha --------- Signed-off-by: Dhrubo Saha --- .../connector/MLPreProcessFunction.java | 5 ++ ...MultiModalEmbeddingPreProcessFunction.java | 50 +++++++++++ ...iModalEmbeddingPreProcessFunctionTest.java | 89 +++++++++++++++++++ ...iModalConnectorPreProcessFunctionTest.java | 2 +- 4 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java create mode 100644 common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java diff --git a/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java index 3a5a3427a8..723da8c07d 100644 --- a/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java +++ b/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java @@ -11,6 +11,7 @@ import org.opensearch.ml.common.connector.functions.preprocess.BedrockEmbeddingPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.CohereEmbeddingPreProcessFunction; +import org.opensearch.ml.common.connector.functions.preprocess.CohereMultiModalEmbeddingPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.CohereRerankPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.MultiModalConnectorPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.OpenAIEmbeddingPreProcessFunction; @@ -21,6 +22,7 @@ public class MLPreProcessFunction { private static final Map> PRE_PROCESS_FUNCTIONS = new HashMap<>(); public static final String TEXT_DOCS_TO_COHERE_EMBEDDING_INPUT = "connector.pre_process.cohere.embedding"; + public static final String IMAGE_TO_COHERE_MULTI_MODAL_EMBEDDING_INPUT = "connector.pre_process.cohere.multimodal_embedding"; public static final String TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT = "connector.pre_process.openai.embedding"; public static final String TEXT_DOCS_TO_BEDROCK_EMBEDDING_INPUT = "connector.pre_process.bedrock.embedding"; public static final String TEXT_IMAGE_TO_BEDROCK_EMBEDDING_INPUT = "connector.pre_process.bedrock.multimodal_embedding"; @@ -37,7 +39,10 @@ public class MLPreProcessFunction { BedrockEmbeddingPreProcessFunction bedrockEmbeddingPreProcessFunction = new BedrockEmbeddingPreProcessFunction(); CohereRerankPreProcessFunction cohereRerankPreProcessFunction = new CohereRerankPreProcessFunction(); MultiModalConnectorPreProcessFunction multiModalEmbeddingPreProcessFunction = new MultiModalConnectorPreProcessFunction(); + CohereMultiModalEmbeddingPreProcessFunction cohereMultiModalEmbeddingPreProcessFunction = + new CohereMultiModalEmbeddingPreProcessFunction(); PRE_PROCESS_FUNCTIONS.put(TEXT_DOCS_TO_COHERE_EMBEDDING_INPUT, cohereEmbeddingPreProcessFunction); + PRE_PROCESS_FUNCTIONS.put(IMAGE_TO_COHERE_MULTI_MODAL_EMBEDDING_INPUT, cohereMultiModalEmbeddingPreProcessFunction); PRE_PROCESS_FUNCTIONS.put(TEXT_IMAGE_TO_BEDROCK_EMBEDDING_INPUT, multiModalEmbeddingPreProcessFunction); PRE_PROCESS_FUNCTIONS.put(TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT, openAIEmbeddingPreProcessFunction); PRE_PROCESS_FUNCTIONS.put(TEXT_DOCS_TO_DEFAULT_EMBEDDING_INPUT, openAIEmbeddingPreProcessFunction); diff --git a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java new file mode 100644 index 0000000000..80c615cb10 --- /dev/null +++ b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java @@ -0,0 +1,50 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.ml.common.connector.functions.preprocess; + +import static org.opensearch.ml.common.utils.StringUtils.convertScriptStringToJsonString; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.opensearch.ml.common.dataset.TextDocsInputDataSet; +import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet; +import org.opensearch.ml.common.input.MLInput; + +public class CohereMultiModalEmbeddingPreProcessFunction extends ConnectorPreProcessFunction { + + public CohereMultiModalEmbeddingPreProcessFunction() { + this.returnDirectlyForRemoteInferenceInput = true; + } + + @Override + public void validate(MLInput mlInput) { + validateTextDocsInput(mlInput); + List docs = ((TextDocsInputDataSet) mlInput.getInputDataset()).getDocs(); + if (docs.isEmpty() || (docs.size() == 1 && docs.getFirst() == null)) { + throw new IllegalArgumentException("No image provided"); + } + } + + @Override + public RemoteInferenceInputDataSet process(MLInput mlInput) { + TextDocsInputDataSet inputData = (TextDocsInputDataSet) mlInput.getInputDataset(); + Map parametersMap = new HashMap<>(); + + /** + * Cohere multi-modal model expects either image or texts, not both. + * For image, customer can use this pre-process function. For texts, customer can use + * connector.pre_process.cohere.embedding + * Cohere expects An array of image data URIs for the model to embed. Maximum number of images per call is 1. + */ + parametersMap.put("images", inputData.getDocs().getFirst()); + return RemoteInferenceInputDataSet + .builder() + .parameters(convertScriptStringToJsonString(Map.of("parameters", parametersMap))) + .build(); + } +} diff --git a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java new file mode 100644 index 0000000000..e16f56287d --- /dev/null +++ b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.ml.common.connector.functions.preprocess; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.opensearch.ml.common.FunctionName; +import org.opensearch.ml.common.dataset.TextDocsInputDataSet; +import org.opensearch.ml.common.dataset.TextSimilarityInputDataSet; +import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet; +import org.opensearch.ml.common.input.MLInput; + +public class CohereMultiModalEmbeddingPreProcessFunctionTest { + @Rule + public ExpectedException exceptionRule = ExpectedException.none(); + + CohereMultiModalEmbeddingPreProcessFunction function; + + TextSimilarityInputDataSet textSimilarityInputDataSet; + TextDocsInputDataSet textDocsInputDataSet; + RemoteInferenceInputDataSet remoteInferenceInputDataSet; + + MLInput textEmbeddingInput; + MLInput textSimilarityInput; + MLInput remoteInferenceInput; + + @Before + public void setUp() { + function = new CohereMultiModalEmbeddingPreProcessFunction(); + textSimilarityInputDataSet = TextSimilarityInputDataSet.builder().queryText("test").textDocs(List.of("hello")).build(); + textDocsInputDataSet = TextDocsInputDataSet.builder().docs(List.of("imageString")).build(); + remoteInferenceInputDataSet = RemoteInferenceInputDataSet.builder().parameters(Map.of("images", "value2")).build(); + + textEmbeddingInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet).build(); + textSimilarityInput = MLInput.builder().algorithm(FunctionName.TEXT_SIMILARITY).inputDataset(textSimilarityInputDataSet).build(); + remoteInferenceInput = MLInput.builder().algorithm(FunctionName.REMOTE).inputDataset(remoteInferenceInputDataSet).build(); + } + + @Test + public void testProcess_whenNullInput_expectIllegalArgumentException() { + exceptionRule.expect(IllegalArgumentException.class); + exceptionRule.expectMessage("Preprocess function input can't be null"); + function.apply(null); + } + + @Test + public void testProcess_whenWrongInput_expectIllegalArgumentException() { + exceptionRule.expect(IllegalArgumentException.class); + exceptionRule.expectMessage("This pre_process_function can only support TextDocsInputDataSet"); + function.apply(textSimilarityInput); + } + + @Test + public void testProcess_whenCorrectInput_expectCorrectOutput() { + MLInput mlInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet).build(); + RemoteInferenceInputDataSet dataSet = function.apply(mlInput); + assertEquals(1, dataSet.getParameters().size()); + assertEquals("imageString", dataSet.getParameters().get("images")); + + } + + @Test + public void testProcess_whenInputTextIsnull_expectIllegalArgumentException() { + exceptionRule.expect(IllegalArgumentException.class); + exceptionRule.expectMessage("No image provided"); + List docs = new ArrayList<>(); + docs.add(null); + TextDocsInputDataSet textDocsInputDataSet1 = TextDocsInputDataSet.builder().docs(docs).build(); + MLInput mlInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet1).build(); + RemoteInferenceInputDataSet dataSet = function.apply(mlInput); + } + + @Test + public void testProcess_whenRemoteInferenceInput_expectRemoteInferenceInputDataSet() { + RemoteInferenceInputDataSet dataSet = function.apply(remoteInferenceInput); + assertEquals(remoteInferenceInputDataSet, dataSet); + } +} diff --git a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java index 6ea8da20f9..4442ff7339 100644 --- a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java +++ b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java @@ -39,7 +39,7 @@ public class MultiModalConnectorPreProcessFunctionTest { @Before public void setUp() { function = new MultiModalConnectorPreProcessFunction(); - textSimilarityInputDataSet = TextSimilarityInputDataSet.builder().queryText("test").textDocs(Arrays.asList("hello")).build(); + textSimilarityInputDataSet = TextSimilarityInputDataSet.builder().queryText("test").textDocs(List.of("hello")).build(); textDocsInputDataSet = TextDocsInputDataSet.builder().docs(Arrays.asList("hello", "world")).build(); remoteInferenceInputDataSet = RemoteInferenceInputDataSet .builder() From 4f21953157cd4e04672e034dab5c9b401a2c07a2 Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Mon, 18 Nov 2024 11:48:05 -0800 Subject: [PATCH 4/7] getFirst is not allowed in java 17 (#3226) Signed-off-by: Dhrubo Saha --- .../CohereMultiModalEmbeddingPreProcessFunction.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java index 80c615cb10..7d25fa7202 100644 --- a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java +++ b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java @@ -25,9 +25,10 @@ public CohereMultiModalEmbeddingPreProcessFunction() { public void validate(MLInput mlInput) { validateTextDocsInput(mlInput); List docs = ((TextDocsInputDataSet) mlInput.getInputDataset()).getDocs(); - if (docs.isEmpty() || (docs.size() == 1 && docs.getFirst() == null)) { + if (docs == null || docs.isEmpty() || (docs.size() == 1 && docs.get(0) == null)) { throw new IllegalArgumentException("No image provided"); } + } @Override @@ -41,7 +42,7 @@ public RemoteInferenceInputDataSet process(MLInput mlInput) { * connector.pre_process.cohere.embedding * Cohere expects An array of image data URIs for the model to embed. Maximum number of images per call is 1. */ - parametersMap.put("images", inputData.getDocs().getFirst()); + parametersMap.put("images", inputData.getDocs().get(0)); return RemoteInferenceInputDataSet .builder() .parameters(convertScriptStringToJsonString(Map.of("parameters", parametersMap))) From f9cbf15edcf885fa3f64dbb39a467c29604bbc5c Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:12:42 -0800 Subject: [PATCH 5/7] adding blue print doc for cohere multi-modal model (#3229) (#3232) * adding blue print doc Signed-off-by: Dhrubo Saha * addressed comments Signed-off-by: Dhrubo Saha * addressed comment Signed-off-by: Dhrubo Saha --------- Signed-off-by: Dhrubo Saha (cherry picked from commit de301b739b2bdc590cfbdb28682a0b53c5988cdb) Co-authored-by: Dhrubo Saha --- ...MultiModalEmbeddingPreProcessFunction.java | 6 +- ...iModalEmbeddingPreProcessFunctionTest.java | 2 +- .../cohere_connector_embedding_blueprint.md | 8 +- ...ere_connector_image_embedding_blueprint.md | 324 ++++++++++++++++++ 4 files changed, 332 insertions(+), 8 deletions(-) create mode 100644 docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md diff --git a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java index 7d25fa7202..31180d7ef8 100644 --- a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java +++ b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java @@ -25,7 +25,7 @@ public CohereMultiModalEmbeddingPreProcessFunction() { public void validate(MLInput mlInput) { validateTextDocsInput(mlInput); List docs = ((TextDocsInputDataSet) mlInput.getInputDataset()).getDocs(); - if (docs == null || docs.isEmpty() || (docs.size() == 1 && docs.get(0) == null)) { + if (docs == null || docs.isEmpty() || docs.get(0) == null) { throw new IllegalArgumentException("No image provided"); } @@ -34,7 +34,7 @@ public void validate(MLInput mlInput) { @Override public RemoteInferenceInputDataSet process(MLInput mlInput) { TextDocsInputDataSet inputData = (TextDocsInputDataSet) mlInput.getInputDataset(); - Map parametersMap = new HashMap<>(); + Map parametersMap = new HashMap<>(); /** * Cohere multi-modal model expects either image or texts, not both. @@ -42,7 +42,7 @@ public RemoteInferenceInputDataSet process(MLInput mlInput) { * connector.pre_process.cohere.embedding * Cohere expects An array of image data URIs for the model to embed. Maximum number of images per call is 1. */ - parametersMap.put("images", inputData.getDocs().get(0)); + parametersMap.put("images", inputData.getDocs()); return RemoteInferenceInputDataSet .builder() .parameters(convertScriptStringToJsonString(Map.of("parameters", parametersMap))) diff --git a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java index e16f56287d..2334811690 100644 --- a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java +++ b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java @@ -66,7 +66,7 @@ public void testProcess_whenCorrectInput_expectCorrectOutput() { MLInput mlInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet).build(); RemoteInferenceInputDataSet dataSet = function.apply(mlInput); assertEquals(1, dataSet.getParameters().size()); - assertEquals("imageString", dataSet.getParameters().get("images")); + assertEquals("[\"imageString\"]", dataSet.getParameters().get("images")); } diff --git a/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md b/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md index fe910d0c79..4386251c00 100644 --- a/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md +++ b/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md @@ -1,6 +1,6 @@ ### Cohere Embedding Connector Blueprint: -This blueprint will show you how to connect a Cohere embedding model to your Opensearch cluster, including creating a k-nn index and your own Embedding pipeline. You will require a Cohere API key to create a connector. +This blueprint will show you how to connect a Cohere embedding model to your OpenSearch cluster, including creating a k-nn index and your own Embedding pipeline. You will require a Cohere API key to create a connector. Cohere currently offers the following Embedding models (with model name and embedding dimensions). Note that only the following have been tested with the blueprint guide. @@ -97,7 +97,7 @@ The last step is to deploy your model. Use the `model_id` returned by the regist POST /_plugins/_ml/models//_deploy ``` -This will once again spawn a task to deploy your Model, with a response that will look like: +This will once again spawn a task to deploy your model, with a response that will look like: ```json { @@ -113,11 +113,11 @@ You can run the GET tasks request again to verify the status. GET /_plugins/_ml/tasks/ ``` -Once this is complete, your Model is deployed and ready! +Once this is complete, your model is deployed and ready! ##### 1e. Test model -You can try this request to test that the Model behaves correctly: +You can try this request to test that the model behaves correctly: ```json POST /_plugins/_ml/models//_predict diff --git a/docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md b/docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md new file mode 100644 index 0000000000..06af2c9b90 --- /dev/null +++ b/docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md @@ -0,0 +1,324 @@ +### Cohere Embedding Connector Blueprint: + +This blueprint will show you how to connect a Cohere multi-modal embedding model to your OpenSearch cluster, including creating a k-nn index and your own Embedding pipeline. You will require a Cohere API key to create a connector. + +Cohere currently offers the following Embedding models (with model name and embedding dimensions). Note that only the following have been tested with the blueprint guide. + +- embed-english-v3.0 1024 +- embed-english-v2.0 4096 + +See [Cohere's /embed API docs](https://docs.cohere.com/reference/embed) for more details. + +#### 1. Create a connector and model group + +##### 1a. Register model group + +```json +POST /_plugins/_ml/model_groups/_register + +{ + "name": "cohere_model_group", + "description": "Your Cohere model group" +} +``` + +This request response will return the `model_group_id`, note it down. + +##### 1b. Create a connector + +See above for all the values the `parameters > model` parameter can take. + +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "Cohere Embed Model", + "description": "The connector to Cohere's public embed API", + "version": "1", + "protocol": "http", + "credential": { + "cohere_key": "" + }, + "parameters": { + "model": "", // Choose a Model from the provided list above + "input_type":"image", + "truncate": "END" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "https://api.cohere.ai/v1/embed", + "headers": { + "Authorization": "Bearer ${credential.cohere_key}", + "Request-Source": "unspecified:opensearch" + }, + "request_body": "{ \"images\": ${parameters.images}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }", + "pre_process_function": "connector.pre_process.cohere.multimodal_embedding", + "post_process_function": "connector.post_process.cohere.embedding" + } + ] +} +``` + +This request response will return the `connector_id`, note it down. + +##### 1c. Register a model with your connector + +You can now register your model with the `model_group_id` and `connector_id` created from the previous steps. + +```json +POST /_plugins/_ml/models/_register +Content-Type: application/json + +{ + "name": "Cohere Embed Model", + "function_name": "remote", + "model_group_id": "", + "description": "Your Cohere Embedding Model", + "connector_id": "" +} +``` + +This will create a registration task, the response should look like: + +```json +{ + "task_id": "9bXpRY0BRil1qhQaUK-u", + "status": "CREATED", + "model_id": "9rXpRY0BRil1qhQaUK_8" +} +``` + +##### 1d. Deploy model + +The last step is to deploy your model. Use the `model_id` returned by the registration request, and run: + +```json +POST /_plugins/_ml/models//_deploy +``` + +This will once again spawn a task to deploy your model, with a response that will look like: + +```json +{ + "task_id": "97XrRY0BRil1qhQaQK_c", + "task_type": "DEPLOY_MODEL", + "status": "COMPLETED" +} +``` + +You can run the GET tasks request again to verify the status. + +```json +GET /_plugins/_ml/tasks/ +``` + +Once this is complete, your model is deployed and ready! + +##### 1e. Test model + +You can try this request to test that the model behaves correctly: + +```json +POST /_plugins/_ml/models//_predict +{ + "parameters": { + "images": [""] + } +} +``` + +It should return a response similar to this: + +```json +{ + "inference_results": [ + { + "output": [ + { + "name": "sentence_embedding", + "data_type": "FLOAT32", + "shape": [ + 1024 + ], + "data": [ + -0.0024547577, + 0.0062217712, + -0.01675415, + -0.020736694, + -0.020263672, + ... ... + 0.038635254 + ] + } + ], + "status_code": 200 + } + ] +} +``` + +#### (Optional) 2. Setup k-NN index and ingestion pipeline + +##### 2a. Create your pipeline + +It is important that the `field_map` parameter contains all the document fields you'd like to embed as a vector. The key value is the document field name, and the value will be the field containing the embedding. + +```json +PUT /_ingest/pipeline/cohere-ingest-pipeline +{ + "description": "Test Cohere Embedding pipeline", + "processors": [ + { + "text_embedding": { + "model_id": "", + "field_map": { + "image_base64": "image_embedding" + } + } + } + ] +} +``` + +Sample response: + +```json +{ + "acknowledged": true +} +``` + +##### 2b. Create a k-NN index + +Here `cohere-nlp-index` is the name of your index, you can change it as needed. + +````json +PUT /cohere-nlp-index + +{ + "settings": { + "index.knn": true, + "default_pipeline": "cohere-ingest-pipeline" + }, + "mappings": { + "properties": { + "id": { + "type": "text" + }, + "image_embedding": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "engine": "lucene", + "space_type": "l2", + "name": "hnsw", + "parameters": {} + } + }, + "image_base64": { + "type": "text" + } + } + } +} + +Sample response: + +```json +{ + "acknowledged": true, + "shards_acknowledged": true, + "index": "cohere-nlp-index" +} +```` + +##### 2c. Testing the index and pipeline + +First, you can insert a record: + +```json +PUT /cohere-nlp-index/_doc/1 +{ + "image_base64": "", + "id": "c1" +} +``` + +Sample response: + +```json +{ + "_index": "cohere-nlp-index", + "_id": "1", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 1, + "failed": 0 + }, + "_seq_no": 0, + "_primary_term": 1 +} +``` + +The last step is to check that the embeddings were properly created. Notice that the embedding field created corresponds to the `field_map` mapping you defined in step 3a. + +```json +GET /cohere-nlp-index/\_search + +{ + "query": { + "match_all": {} + } +} +``` + +Sample response: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1, + "hits": [ + { + "_index": "cohere-nlp-index", + "_id": "1", + "_score": 1, + "_source": { + "image_base64": "", + "image_embedding": [ + 0.02494812, + -0.009391785, + -0.015716553, + -0.051849365, + -0.015930176, + -0.024734497, + -0.028518677, + -0.008323669, + -0.008323669, + ............. + + ], + "id": "c1" + } + } + ] + } +} +``` + +Congratulations! You've successfully created your ingestion pipeline. From 78a304a95d546db31139a135a1f031890a55ae72 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 26 Nov 2024 16:59:06 -0800 Subject: [PATCH 6/7] [Enhancement] Fetch system index mappings from json file instead of string constants (#3153) * feat(index mappings): fetch mappings and version from json file instead of string constants Signed-off-by: Pavan Yekbote * refactor: changing exception being thrown Signed-off-by: Pavan Yekbote * chore: remove unused file Signed-off-by: Pavan Yekbote * chore: fix typo in comment Signed-off-by: Pavan Yekbote * chore: adding new line at the end of files Signed-off-by: Pavan Yekbote * feat: add test cases Signed-off-by: Pavan Yekbote * fix: remove test code Signed-off-by: Pavan Yekbote * fix(test): in main the versions were not updated appropriately Signed-off-by: Pavan Yekbote * refactor: move mapping templates under common module Signed-off-by: Pavan Yekbote * refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings Signed-off-by: Pavan Yekbote * refactor: update comment Signed-off-by: Pavan Yekbote * refactor: rename dir from mappings to index-mappings Signed-off-by: Pavan Yekbote * fix: add null checks Signed-off-by: Pavan Yekbote * fix: adding dependencies for testing Signed-off-by: Pavan Yekbote * fix(test): compare json object rather than strings to avoid eol character issue Signed-off-by: Pavan Yekbote * refactor: combine if statements into single check Signed-off-by: Pavan Yekbote * refactoring: null handling + clean code Signed-off-by: Pavan Yekbote * spotless apply Signed-off-by: Pavan Yekbote --------- Signed-off-by: Pavan Yekbote --- common/build.gradle | 1 + .../org/opensearch/ml/common/CommonValue.java | 539 +----------------- .../org/opensearch/ml/common}/MLIndex.java | 71 ++- .../ConversationalIndexConstants.java | 75 +-- .../ml/common/utils/IndexUtils.java | 43 ++ .../ml/common/utils/StringUtils.java | 23 +- .../resources/index-mappings/ml-agent.json | 45 ++ .../resources/index-mappings/ml-config.json | 24 + .../index-mappings/ml-connector.json | 95 +++ .../index-mappings/ml-controller.json | 10 + .../index-mappings/ml-memory-message.json | 35 ++ .../index-mappings/ml-memory-meta.json | 27 + .../index-mappings/ml-model-group.json | 83 +++ .../resources/index-mappings/ml-model.json | 243 ++++++++ .../resources/index-mappings/ml-task.json | 86 +++ .../ml/common/utils/IndexUtilsTest.java | 110 ++++ .../test-mapping-malformed.json | 13 + .../index-mappings/test-mapping.json | 16 + memory/build.gradle | 1 + .../MetricsCorrelation.java | 4 +- .../ml/engine/indices/MLIndicesHandler.java | 1 + .../engine/indices/MLIndicesHandlerTest.java | 2 +- 22 files changed, 912 insertions(+), 635 deletions(-) rename {ml-algorithms/src/main/java/org/opensearch/ml/engine/indices => common/src/main/java/org/opensearch/ml/common}/MLIndex.java (56%) create mode 100644 common/src/main/resources/index-mappings/ml-agent.json create mode 100644 common/src/main/resources/index-mappings/ml-config.json create mode 100644 common/src/main/resources/index-mappings/ml-connector.json create mode 100644 common/src/main/resources/index-mappings/ml-controller.json create mode 100644 common/src/main/resources/index-mappings/ml-memory-message.json create mode 100644 common/src/main/resources/index-mappings/ml-memory-meta.json create mode 100644 common/src/main/resources/index-mappings/ml-model-group.json create mode 100644 common/src/main/resources/index-mappings/ml-model.json create mode 100644 common/src/main/resources/index-mappings/ml-task.json create mode 100644 common/src/test/resources/index-mappings/test-mapping-malformed.json create mode 100644 common/src/test/resources/index-mappings/test-mapping.json diff --git a/common/build.gradle b/common/build.gradle index 60edb3101a..979752bc05 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -26,6 +26,7 @@ dependencies { compileOnly group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' compileOnly group: 'com.google.code.gson', name: 'gson', version: '2.10.1' compileOnly group: 'org.json', name: 'json', version: '20231013' + testImplementation group: 'org.json', name: 'json', version: '20231013' implementation('com.google.guava:guava:32.1.2-jre') { exclude group: 'com.google.guava', module: 'failureaccess' exclude group: 'com.google.code.findbugs', module: 'jsr305' diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 3adaa8ca2e..e06e552536 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -5,39 +5,9 @@ package org.opensearch.ml.common; -import static org.opensearch.ml.common.MLConfig.CONFIG_TYPE_FIELD; -import static org.opensearch.ml.common.MLConfig.LAST_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.MLConfig.ML_CONFIGURATION_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.APPLICATION_TYPE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ADDITIONAL_INFO_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CONVERSATION_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CREATE_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INPUT_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ORIGIN_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_PROMPT_TEMPLATE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_RESPONSE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_TRACE_NUMBER_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_CREATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_NAME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.PARENT_INTERACTIONS_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.USER_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.ALL_CONFIG_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.MODEL_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.EMBEDDING_DIMENSION_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.FRAMEWORK_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.MODEL_MAX_LENGTH_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.NORMALIZE_RESULT_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.POOLING_MODE_FIELD; - import java.util.Set; import org.opensearch.Version; -import org.opensearch.ml.common.agent.MLAgent; -import org.opensearch.ml.common.connector.AbstractConnector; -import org.opensearch.ml.common.controller.MLController; import com.google.common.collect.ImmutableSet; @@ -63,516 +33,27 @@ public class CommonValue { public static final String ML_MODEL_GROUP_INDEX = ".plugins-ml-model-group"; public static final String ML_MODEL_INDEX = ".plugins-ml-model"; public static final String ML_TASK_INDEX = ".plugins-ml-task"; - public static final Integer ML_MODEL_GROUP_INDEX_SCHEMA_VERSION = 2; - public static final Integer ML_MODEL_INDEX_SCHEMA_VERSION = 11; public static final String ML_CONNECTOR_INDEX = ".plugins-ml-connector"; - public static final Integer ML_TASK_INDEX_SCHEMA_VERSION = 3; - public static final Integer ML_CONNECTOR_SCHEMA_VERSION = 3; public static final String ML_CONFIG_INDEX = ".plugins-ml-config"; - public static final Integer ML_CONFIG_INDEX_SCHEMA_VERSION = 4; public static final String ML_CONTROLLER_INDEX = ".plugins-ml-controller"; - public static final Integer ML_CONTROLLER_INDEX_SCHEMA_VERSION = 1; public static final String ML_MAP_RESPONSE_KEY = "response"; public static final String ML_AGENT_INDEX = ".plugins-ml-agent"; - public static final Integer ML_AGENT_INDEX_SCHEMA_VERSION = 2; public static final String ML_MEMORY_META_INDEX = ".plugins-ml-memory-meta"; - public static final Integer ML_MEMORY_META_INDEX_SCHEMA_VERSION = 1; public static final String ML_MEMORY_MESSAGE_INDEX = ".plugins-ml-memory-message"; public static final String ML_STOP_WORDS_INDEX = ".plugins-ml-stop-words"; public static final Set stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words"); - public static final Integer ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION = 1; - public static final String USER_FIELD_MAPPING = " \"" - + CommonValue.USER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " }\n"; - public static final String ML_MODEL_GROUP_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + ML_MODEL_GROUP_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + MLModelGroup.MODEL_GROUP_NAME_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.DESCRIPTION_FIELD - + "\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"" - + MLModelGroup.LATEST_VERSION_FIELD - + "\": {\n" - + " \"type\": \"integer\"\n" - + " },\n" - + " \"" - + MLModelGroup.MODEL_GROUP_ID_FIELD - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.CREATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModelGroup.LAST_UPDATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_FIELDS = " \"properties\": {\n" - + " \"" - + AbstractConnector.NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + AbstractConnector.VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + AbstractConnector.PROTOCOL_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CREDENTIAL_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CLIENT_CONFIG_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.ACTIONS_FIELD - + "\" : {\"type\": \"flat_object\"}\n"; - - public static final String ML_MODEL_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_MODEL_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLModel.ALGORITHM_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLModel.OLD_MODEL_VERSION_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_GROUP_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_FIELD - + "\" : {\"type\": \"binary\"},\n" - + " \"" - + MLModel.CHUNK_NUMBER_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.TOTAL_CHUNKS_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLModel.MODEL_FORMAT_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_STATE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_SIZE_IN_BYTES_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CURRENT_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODES_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DEPLOY_TO_ALL_NODES_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.MODEL_CONFIG_FIELD - + "\" : {\"properties\":{\"" - + MODEL_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + EMBEDDING_DIMENSION_FIELD - + "\":{\"type\":\"integer\"},\"" - + FRAMEWORK_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + POOLING_MODE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + NORMALIZE_RESULT_FIELD - + "\":{\"type\":\"boolean\"},\"" - + MODEL_MAX_LENGTH_FIELD - + "\":{\"type\":\"integer\"},\"" - + ALL_CONFIG_FIELD - + "\":{\"type\":\"text\"}}},\n" - + " \"" - + MLModel.DEPLOY_SETTING_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.IS_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_CONTROLLER_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.RATE_LIMITER_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_HASH_VALUE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_REGISTERED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_DEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UNDEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.INTERFACE_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.GUARDRAILS_FIELD - + "\" : {\n" - + " \"properties\": {\n" - + " \"input_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"output_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModel.CONNECTOR_FIELD - + "\": {" - + ML_CONNECTOR_INDEX_FIELDS - + " }\n}," - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_TASK_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_TASK_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLTask.MODEL_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.TASK_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.FUNCTION_NAME_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.STATE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.INPUT_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.PROGRESS_FIELD - + "\": {\"type\": \"float\"},\n" - + " \"" - + MLTask.OUTPUT_INDEX_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.WORKER_NODE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.LAST_UPDATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.ERROR_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + MLTask.IS_ASYNC_TASK_FIELD - + "\" : {\"type\" : \"boolean\"}, \n" - + " \"" - + MLTask.REMOTE_JOB_FIELD - + "\" : {\"type\": \"flat_object\"}, \n" - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONNECTOR_SCHEMA_VERSION - + "},\n" - + ML_CONNECTOR_INDEX_FIELDS - + ",\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + AbstractConnector.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + AbstractConnector.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONFIG_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONFIG_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MASTER_KEY - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + CONFIG_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + ML_CONFIGURATION_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONTROLLER_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONTROLLER_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLController.USER_RATE_LIMITER - + "\" : {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; - - public static final String ML_AGENT_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_AGENT_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLAgent.AGENT_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLAgent.AGENT_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + MLAgent.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLAgent.LLM_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.TOOLS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.MEMORY_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLAgent.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLAgent.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - public static final String ML_MEMORY_META_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"}\n" - + " }\n" - + "}"; + // Index mapping paths + public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json"; + public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json"; + public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json"; + public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json"; + public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json"; + public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json"; + public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json"; + public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json"; - public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; // Calculate Versions independently of OpenSearch core version public static final Version VERSION_2_11_0 = Version.fromString("2.11.0"); public static final Version VERSION_2_12_0 = Version.fromString("2.12.0"); diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java b/common/src/main/java/org/opensearch/ml/common/MLIndex.java similarity index 56% rename from ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java rename to common/src/main/java/org/opensearch/ml/common/MLIndex.java index 0cc329f1ac..c497452c6b 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java +++ b/common/src/main/java/org/opensearch/ml/common/MLIndex.java @@ -3,46 +3,42 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.ml.engine.indices; +package org.opensearch.ml.common; import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING_PATH; + +import java.io.IOException; +import java.io.UncheckedIOException; + +import org.opensearch.ml.common.utils.IndexUtils; public enum MLIndex { - MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING, ML_MODEL_GROUP_INDEX_SCHEMA_VERSION), - MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING, ML_MODEL_INDEX_SCHEMA_VERSION), - TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING, ML_TASK_INDEX_SCHEMA_VERSION), - CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING, ML_CONNECTOR_SCHEMA_VERSION), - CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING, ML_CONFIG_INDEX_SCHEMA_VERSION), - CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING, ML_CONTROLLER_INDEX_SCHEMA_VERSION), - AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING, ML_AGENT_INDEX_SCHEMA_VERSION), - MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING, ML_MEMORY_META_INDEX_SCHEMA_VERSION), - MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING, ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION); + MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING_PATH), + MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING_PATH), + TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING_PATH), + CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING_PATH), + CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING_PATH), + CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING_PATH), + AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING_PATH), + MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING_PATH), + MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH); private final String indexName; // whether we use an alias for the index @@ -50,11 +46,24 @@ public enum MLIndex { private final String mapping; private final Integer version; - MLIndex(String name, boolean alias, String mapping, Integer version) { + MLIndex(String name, boolean alias, String mappingPath) { this.indexName = name; this.alias = alias; - this.mapping = mapping; - this.version = version; + this.mapping = getMapping(mappingPath); + this.version = IndexUtils.getVersionFromMapping(this.mapping); + } + + private String getMapping(String mappingPath) { + if (mappingPath == null) { + throw new IllegalArgumentException("Mapping path cannot be null"); + } + + try { + return IndexUtils.getMappingFromFile(mappingPath); + } catch (IOException e) { + // Unchecked exception is thrown since the method is being called within a constructor + throw new UncheckedIOException("Failed to fetch index mapping from file: " + mappingPath, e); + } } public String getIndexName() { diff --git a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java index ac639babb2..88f4920761 100644 --- a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java +++ b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java @@ -18,15 +18,15 @@ package org.opensearch.ml.common.conversation; import org.opensearch.common.settings.Setting; +import org.opensearch.ml.common.MLIndex; /** * Class containing a bunch of constant defining how the conversational indices are formatted + * ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META directly for index names and mappings rather than constants */ public class ConversationalIndexConstants { - /** Version of the meta index schema */ - public final static Integer META_INDEX_SCHEMA_VERSION = 2; /** Name of the conversational metadata index */ - public final static String META_INDEX_NAME = ".plugins-ml-memory-meta"; + public final static String META_INDEX_NAME = MLIndex.MEMORY_META.getIndexName(); /** Name of the metadata field for initial timestamp */ public final static String META_CREATED_TIME_FIELD = "create_time"; /** Name of the metadata field for updated timestamp */ @@ -41,38 +41,10 @@ public class ConversationalIndexConstants { public final static String META_ADDITIONAL_INFO_FIELD = "additional_info"; /** Mappings for the conversational metadata index */ - public final static String META_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + META_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; + public final static String META_MAPPING = MLIndex.MEMORY_META.getMapping(); - /** Version of the interactions index schema */ - public final static Integer INTERACTIONS_INDEX_SCHEMA_VERSION = 1; /** Name of the conversational interactions index */ - public final static String INTERACTIONS_INDEX_NAME = ".plugins-ml-memory-message"; + public final static String INTERACTIONS_INDEX_NAME = MLIndex.MEMORY_MESSAGE.getIndexName(); /** Name of the interaction field for the conversation Id */ public final static String INTERACTIONS_CONVERSATION_ID_FIELD = "memory_id"; /** Name of the interaction field for the human input */ @@ -92,42 +64,7 @@ public class ConversationalIndexConstants { /** The trace number of an interaction */ public final static String INTERACTIONS_TRACE_NUMBER_FIELD = "trace_number"; /** Mappings for the interactions index */ - public final static String INTERACTIONS_MAPPINGS = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; + public final static String INTERACTIONS_MAPPINGS = MLIndex.MEMORY_MESSAGE.getMapping(); /** Feature Flag setting for conversational memory */ public static final Setting ML_COMMONS_MEMORY_FEATURE_ENABLED = Setting diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 298bd3ec96..92ccb07bf9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -5,8 +5,15 @@ package org.opensearch.ml.common.utils; +import java.io.IOException; +import java.net.URL; import java.util.Map; +import com.google.common.base.Charsets; +import com.google.common.io.Resources; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; + import lombok.extern.log4j.Log4j2; @Log4j2 @@ -32,4 +39,40 @@ public class IndexUtils { // Note: This does not include static settings like number of shards, which can't be changed after index creation. public static final Map UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1"); public static final Map UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all"); + + public static String getMappingFromFile(String path) throws IOException { + URL url = IndexUtils.class.getClassLoader().getResource(path); + if (url == null) { + throw new IOException("Resource not found: " + path); + } + + String mapping = Resources.toString(url, Charsets.UTF_8).trim(); + if (mapping.isEmpty() || !StringUtils.isJson(mapping)) { + throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path); + } + + return mapping; + } + + public static Integer getVersionFromMapping(String mapping) { + if (mapping == null || mapping.isBlank()) { + throw new IllegalArgumentException("Mapping cannot be null or empty"); + } + + JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); + if (mappingJson == null || !mappingJson.has("_meta")) { + throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping); + } + + JsonObject metaObject = mappingJson.getAsJsonObject("_meta"); + if (metaObject == null || !metaObject.has("schema_version")) { + throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping); + } + + try { + return metaObject.get("schema_version").getAsInt(); + } catch (NumberFormatException | ClassCastException e) { + throw new JsonParseException("Invalid \"schema_version\" value in mapping: " + mapping, e); + } + } } diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 37bfac6f3f..fcc0c4c3c9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -26,6 +26,7 @@ import com.google.gson.Gson; import com.google.gson.JsonElement; +import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonSyntaxException; import com.jayway.jsonpath.JsonPath; @@ -53,12 +54,16 @@ public class StringUtils { } public static final String TO_STRING_FUNCTION_NAME = ".toString()"; - public static boolean isValidJsonString(String Json) { + public static boolean isValidJsonString(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { - new JSONObject(Json); + new JSONObject(json); } catch (JSONException ex) { try { - new JSONArray(Json); + new JSONArray(json); } catch (JSONException ex1) { return false; } @@ -67,6 +72,10 @@ public static boolean isValidJsonString(String Json) { } public static boolean isJson(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { if (!isValidJsonString(json)) { return false; @@ -319,4 +328,12 @@ public static boolean isValidJSONPath(String input) { } } + public static JsonObject getJsonObjectFromString(String jsonString) { + if (jsonString == null || jsonString.isBlank()) { + throw new IllegalArgumentException("Json cannot be null or empty"); + } + + return JsonParser.parseString(jsonString).getAsJsonObject(); + } + } diff --git a/common/src/main/resources/index-mappings/ml-agent.json b/common/src/main/resources/index-mappings/ml-agent.json new file mode 100644 index 0000000000..2bcee6bc3b --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-agent.json @@ -0,0 +1,45 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "type": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "llm": { + "type": "flat_object" + }, + "tools": { + "type": "flat_object" + }, + "parameters": { + "type": "flat_object" + }, + "memory": { + "type": "flat_object" + }, + "is_hidden": { + "type": "boolean" + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-config.json b/common/src/main/resources/index-mappings/ml-config.json new file mode 100644 index 0000000000..6d36d8efb7 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-config.json @@ -0,0 +1,24 @@ +{ + "_meta": { + "schema_version": 4 + }, + "properties": { + "master_key": { + "type": "keyword" + }, + "config_type": { + "type": "keyword" + }, + "ml_configuration": { + "type": "flat_object" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-connector.json b/common/src/main/resources/index-mappings/ml-connector.json new file mode 100644 index 0000000000..4be168c4b9 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-connector.json @@ -0,0 +1,95 @@ +{ + "_meta": { + "schema_version": 3 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "access": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-controller.json b/common/src/main/resources/index-mappings/ml-controller.json new file mode 100644 index 0000000000..6822fb19c5 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-controller.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "schema_version": 1 + }, + "properties": { + "user_rate_limiter": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-memory-message.json b/common/src/main/resources/index-mappings/ml-memory-message.json new file mode 100644 index 0000000000..10b081aee1 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-memory-message.json @@ -0,0 +1,35 @@ +{ + "_meta": { + "schema_version": 1 + }, + "properties": { + "memory_id": { + "type": "keyword" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "input": { + "type": "text" + }, + "prompt_template": { + "type": "text" + }, + "response": { + "type": "text" + }, + "origin": { + "type": "keyword" + }, + "additional_info": { + "type": "flat_object" + }, + "parent_message_id": { + "type": "keyword" + }, + "trace_number": { + "type": "long" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-memory-meta.json b/common/src/main/resources/index-mappings/ml-memory-meta.json new file mode 100644 index 0000000000..7684e25d06 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-memory-meta.json @@ -0,0 +1,27 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "user": { + "type": "keyword" + }, + "application_type": { + "type": "keyword" + }, + "additional_info": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-model-group.json b/common/src/main/resources/index-mappings/ml-model-group.json new file mode 100644 index 0000000000..7e2437e534 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-model-group.json @@ -0,0 +1,83 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "description": { + "type": "text" + }, + "latest_version": { + "type": "integer" + }, + "model_group_id": { + "type": "keyword" + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "access": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-model.json b/common/src/main/resources/index-mappings/ml-model.json new file mode 100644 index 0000000000..b996e463cd --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-model.json @@ -0,0 +1,243 @@ +{ + "_meta": { + "schema_version": 11 + }, + "properties": { + "algorithm": { + "type": "keyword" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "long" + }, + "model_version": { + "type": "keyword" + }, + "model_group_id": { + "type": "keyword" + }, + "model_content": { + "type": "binary" + }, + "chunk_number": { + "type": "long" + }, + "total_chunks": { + "type": "long" + }, + "model_id": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "model_format": { + "type": "keyword" + }, + "model_state": { + "type": "keyword" + }, + "model_content_size_in_bytes": { + "type": "long" + }, + "planning_worker_node_count": { + "type": "integer" + }, + "current_worker_node_count": { + "type": "integer" + }, + "planning_worker_nodes": { + "type": "keyword" + }, + "deploy_to_all_nodes": { + "type": "boolean" + }, + "is_hidden": { + "type": "boolean" + }, + "model_config": { + "properties": { + "model_type": { + "type": "keyword" + }, + "embedding_dimension": { + "type": "integer" + }, + "framework_type": { + "type": "keyword" + }, + "pooling_mode": { + "type": "keyword" + }, + "normalize_result": { + "type": "boolean" + }, + "model_max_length": { + "type": "integer" + }, + "all_config": { + "type": "text" + } + } + }, + "deploy_setting": { + "type": "flat_object" + }, + "is_enabled": { + "type": "boolean" + }, + "is_controller_enabled": { + "type": "boolean" + }, + "rate_limiter": { + "type": "flat_object" + }, + "model_content_hash_value": { + "type": "keyword" + }, + "auto_redeploy_retry_times": { + "type": "integer" + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_registered_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_deployed_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_undeployed_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "interface": { + "type": "flat_object" + }, + "guardrails": { + "properties": { + "input_guardrail": { + "properties": { + "regex": { + "type": "text" + }, + "stop_words": { + "properties": { + "index_name": { + "type": "text" + }, + "source_fields": { + "type": "text" + } + } + } + } + }, + "output_guardrail": { + "properties": { + "regex": { + "type": "text" + }, + "stop_words": { + "properties": { + "index_name": { + "type": "text" + }, + "source_fields": { + "type": "text" + } + } + } + } + } + } + }, + "connector": { + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + } + } + }, + "user": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-task.json b/common/src/main/resources/index-mappings/ml-task.json new file mode 100644 index 0000000000..ad428724bf --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-task.json @@ -0,0 +1,86 @@ +{ + "_meta": { + "schema_version": 3 + }, + "properties": { + "model_id": { + "type": "keyword" + }, + "task_type": { + "type": "keyword" + }, + "function_name": { + "type": "keyword" + }, + "state": { + "type": "keyword" + }, + "input_type": { + "type": "keyword" + }, + "progress": { + "type": "float" + }, + "output_index": { + "type": "keyword" + }, + "worker_node": { + "type": "keyword" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_update_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "error": { + "type": "text" + }, + "is_async": { + "type": "boolean" + }, + "remote_job": { + "type": "flat_object" + }, + "user": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } +} diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index 8cfad37c98..a4b3badacf 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -6,11 +6,15 @@ package org.opensearch.ml.common.utils; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import java.io.IOException; import java.util.Map; import org.junit.Test; +import com.google.gson.JsonParseException; + public class IndexUtilsTest { @Test @@ -42,4 +46,110 @@ public void testUpdatedAllNodesReplicaIndexSettingsContainsExpectedValues() { assertEquals("index.auto_expand_replicas should be 0-all", updatedIndexSettings.get("index.auto_expand_replicas"), "0-all"); assertEquals("INDEX_SETTINGS should contain exactly 1 settings", 1, updatedIndexSettings.size()); } + + @Test + public void testGetMappingFromFile() { + String expectedMapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + try { + String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test-mapping.json"); + // comparing JsonObjects to avoid issues caused by eol character in different OS + assertEquals(StringUtils.getJsonObjectFromString(expectedMapping), StringUtils.getJsonObjectFromString(actualMapping)); + } catch (IOException e) { + throw new RuntimeException("Failed to read file at path: index-mappings/test-mapping.json"); + } + } + + @Test + public void testGetMappingFromFileFileNotFound() { + String path = "index-mappings/test-mapping-not-found.json"; + IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Resource not found: " + path, e.getMessage()); + } + + @Test + public void testGetMappingFromFilesMalformedJson() { + String path = "index-mappings/test-mapping-malformed.json"; + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Invalid or non-JSON mapping at: " + path, e.getMessage()); + } + + @Test + public void testGetVersionFromMapping() { + Integer expectedVersion = 1; + String mapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + assertEquals(expectedVersion, IndexUtils.getVersionFromMapping(mapping)); + } + + @Test + public void testGetVersionFromMappingNoMeta() { + String mapping = "{\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"_meta\" object in mapping: " + mapping, e.getMessage()); + } + + @Test + public void testGetVersionFromMappingNoSchemaVersion() { + String mapping = "{\n" + + " \"_meta\": {\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping, e.getMessage()); + } } diff --git a/common/src/test/resources/index-mappings/test-mapping-malformed.json b/common/src/test/resources/index-mappings/test-mapping-malformed.json new file mode 100644 index 0000000000..f87e98da9b --- /dev/null +++ b/common/src/test/resources/index-mappings/test-mapping-malformed.json @@ -0,0 +1,13 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + { + "malformed": } + } + } +} diff --git a/common/src/test/resources/index-mappings/test-mapping.json b/common/src/test/resources/index-mappings/test-mapping.json new file mode 100644 index 0000000000..6114de4687 --- /dev/null +++ b/common/src/test/resources/index-mappings/test-mapping.json @@ -0,0 +1,16 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + "test_field_2": { + "type": "test_type_2" + }, + "test_field_3": { + "type": "test_type_3" + } + } +} diff --git a/memory/build.gradle b/memory/build.gradle index b6198509d0..8251303158 100644 --- a/memory/build.gradle +++ b/memory/build.gradle @@ -37,6 +37,7 @@ dependencies { testImplementation "org.opensearch.test:framework:${opensearch_version}" testImplementation "org.opensearch.client:opensearch-rest-client:${opensearch_version}" testImplementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1' + testImplementation group: 'org.json', name: 'json', version: '20231013' } test { diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java index e6a15ecdae..9efe9372b8 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java @@ -8,7 +8,6 @@ import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; import static org.opensearch.ml.common.MLModel.MODEL_STATE_FIELD; @@ -40,6 +39,7 @@ import org.opensearch.ml.common.AccessMode; import org.opensearch.ml.common.CommonValue; import org.opensearch.ml.common.FunctionName; +import org.opensearch.ml.common.MLIndex; import org.opensearch.ml.common.MLModel; import org.opensearch.ml.common.MLModelGroup; import org.opensearch.ml.common.MLTask; @@ -131,7 +131,7 @@ public void execute(Input input, ActionListener Date: Tue, 3 Dec 2024 17:25:18 +0800 Subject: [PATCH 7/7] Retrieve remote model id from registration response in IT to avoid flaky (#3244) Signed-off-by: zane-neo --- .../java/org/opensearch/ml/rest/MLCommonsRestTestCase.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java b/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java index 2092b9f4b4..6f710ea1de 100644 --- a/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java +++ b/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java @@ -1002,15 +1002,11 @@ public String registerRemoteModel(String createConnectorInput, String modelName, String connectorId = (String) responseMap.get("connector_id"); response = RestMLRemoteInferenceIT.registerRemoteModel(modelName, modelName, connectorId); responseMap = parseResponseToMap(response); - String taskId = (String) responseMap.get("task_id"); - waitForTask(taskId, MLTaskState.COMPLETED); - response = RestMLRemoteInferenceIT.getTask(taskId); - responseMap = parseResponseToMap(response); String modelId = (String) responseMap.get("model_id"); if (deploy) { response = RestMLRemoteInferenceIT.deployRemoteModel(modelId); responseMap = parseResponseToMap(response); - taskId = (String) responseMap.get("task_id"); + String taskId = (String) responseMap.get("task_id"); waitForTask(taskId, MLTaskState.COMPLETED); } return modelId;