diff --git a/.github/workflows/CI-workflow.yml b/.github/workflows/CI-workflow.yml index 14632e46db..7c38b23eab 100644 --- a/.github/workflows/CI-workflow.yml +++ b/.github/workflows/CI-workflow.yml @@ -27,8 +27,6 @@ jobs: strategy: matrix: java: [21] - env: - ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true name: Build and Test MLCommons Plugin on linux if: github.repository == 'opensearch-project/ml-commons' @@ -41,21 +39,23 @@ jobs: # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} - name: Setup Java ${{ matrix.java }} uses: actions/setup-java@v1 with: java-version: ${{ matrix.java }} - - uses: aws-actions/configure-aws-credentials@v2 + - uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.ML_ROLE }} aws-region: us-west-2 - name: Checkout MLCommons - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} @@ -77,7 +77,7 @@ jobs: echo "build-test-linux=$plugin" >> $GITHUB_OUTPUT - name: Upload Coverage Report - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v4 with: flags: ml-commons token: ${{ secrets.CODECOV_TOKEN }} @@ -101,7 +101,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: aws-actions/configure-aws-credentials@v2 + - uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.ML_ROLE }} aws-region: us-west-2 @@ -112,11 +112,11 @@ jobs: java-version: ${{ matrix.java }} - name: Checkout MLCommons - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/download-artifact@v4.1.7 + - uses: actions/download-artifact@v4 with: name: ml-plugin-linux-${{ matrix.java }} @@ -181,7 +181,7 @@ jobs: fi - name: Upload Coverage Report - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: flags: ml-commons token: ${{ secrets.CODECOV_TOKEN }} @@ -202,14 +202,14 @@ jobs: with: java-version: ${{ matrix.java }} - - uses: aws-actions/configure-aws-credentials@v2 + - uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ secrets.ML_ROLE }} aws-region: us-west-2 # ml-commons - name: Checkout MLCommons - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} diff --git a/common/build.gradle b/common/build.gradle index 60edb3101a..979752bc05 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -26,6 +26,7 @@ dependencies { compileOnly group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' compileOnly group: 'com.google.code.gson', name: 'gson', version: '2.10.1' compileOnly group: 'org.json', name: 'json', version: '20231013' + testImplementation group: 'org.json', name: 'json', version: '20231013' implementation('com.google.guava:guava:32.1.2-jre') { exclude group: 'com.google.guava', module: 'failureaccess' exclude group: 'com.google.code.findbugs', module: 'jsr305' diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 60852dcb55..be68a8d42f 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -5,39 +5,9 @@ package org.opensearch.ml.common; -import static org.opensearch.ml.common.MLConfig.CONFIG_TYPE_FIELD; -import static org.opensearch.ml.common.MLConfig.LAST_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.MLConfig.ML_CONFIGURATION_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.APPLICATION_TYPE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ADDITIONAL_INFO_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CONVERSATION_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CREATE_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INPUT_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ORIGIN_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_PROMPT_TEMPLATE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_RESPONSE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_TRACE_NUMBER_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_CREATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_NAME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.PARENT_INTERACTIONS_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.USER_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.ALL_CONFIG_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.MODEL_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.EMBEDDING_DIMENSION_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.FRAMEWORK_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.MODEL_MAX_LENGTH_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.NORMALIZE_RESULT_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.POOLING_MODE_FIELD; - import java.util.Set; import org.opensearch.Version; -import org.opensearch.ml.common.agent.MLAgent; -import org.opensearch.ml.common.connector.AbstractConnector; -import org.opensearch.ml.common.controller.MLController; import com.google.common.collect.ImmutableSet; @@ -63,517 +33,28 @@ public class CommonValue { public static final String ML_MODEL_GROUP_INDEX = ".plugins-ml-model-group"; public static final String ML_MODEL_INDEX = ".plugins-ml-model"; public static final String ML_TASK_INDEX = ".plugins-ml-task"; - public static final Integer ML_MODEL_GROUP_INDEX_SCHEMA_VERSION = 2; - public static final Integer ML_MODEL_INDEX_SCHEMA_VERSION = 11; public static final String ML_CONNECTOR_INDEX = ".plugins-ml-connector"; - public static final Integer ML_TASK_INDEX_SCHEMA_VERSION = 3; - public static final Integer ML_CONNECTOR_SCHEMA_VERSION = 3; public static final String ML_CONFIG_INDEX = ".plugins-ml-config"; - public static final Integer ML_CONFIG_INDEX_SCHEMA_VERSION = 4; public static final String ML_CONTROLLER_INDEX = ".plugins-ml-controller"; - public static final Integer ML_CONTROLLER_INDEX_SCHEMA_VERSION = 1; public static final String ML_MAP_RESPONSE_KEY = "response"; public static final String ML_AGENT_INDEX = ".plugins-ml-agent"; - public static final Integer ML_AGENT_INDEX_SCHEMA_VERSION = 2; public static final String ML_MEMORY_META_INDEX = ".plugins-ml-memory-meta"; - public static final Integer ML_MEMORY_META_INDEX_SCHEMA_VERSION = 1; public static final String ML_MEMORY_MESSAGE_INDEX = ".plugins-ml-memory-message"; public static final String ML_STOP_WORDS_INDEX = ".plugins-ml-stop-words"; public static final Set stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words"); - public static final Integer ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION = 1; public static final String TOOL_MODEL_RELATED_FIELD_PREFIX = "tools.parameters."; - public static final String USER_FIELD_MAPPING = " \"" - + CommonValue.USER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " }\n"; - public static final String ML_MODEL_GROUP_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + ML_MODEL_GROUP_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + MLModelGroup.MODEL_GROUP_NAME_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.DESCRIPTION_FIELD - + "\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"" - + MLModelGroup.LATEST_VERSION_FIELD - + "\": {\n" - + " \"type\": \"integer\"\n" - + " },\n" - + " \"" - + MLModelGroup.MODEL_GROUP_ID_FIELD - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.CREATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModelGroup.LAST_UPDATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_FIELDS = " \"properties\": {\n" - + " \"" - + AbstractConnector.NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + AbstractConnector.VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + AbstractConnector.PROTOCOL_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CREDENTIAL_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CLIENT_CONFIG_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.ACTIONS_FIELD - + "\" : {\"type\": \"flat_object\"}\n"; - - public static final String ML_MODEL_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_MODEL_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLModel.ALGORITHM_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLModel.OLD_MODEL_VERSION_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_GROUP_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_FIELD - + "\" : {\"type\": \"binary\"},\n" - + " \"" - + MLModel.CHUNK_NUMBER_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.TOTAL_CHUNKS_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLModel.MODEL_FORMAT_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_STATE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_SIZE_IN_BYTES_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CURRENT_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODES_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DEPLOY_TO_ALL_NODES_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.MODEL_CONFIG_FIELD - + "\" : {\"properties\":{\"" - + MODEL_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + EMBEDDING_DIMENSION_FIELD - + "\":{\"type\":\"integer\"},\"" - + FRAMEWORK_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + POOLING_MODE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + NORMALIZE_RESULT_FIELD - + "\":{\"type\":\"boolean\"},\"" - + MODEL_MAX_LENGTH_FIELD - + "\":{\"type\":\"integer\"},\"" - + ALL_CONFIG_FIELD - + "\":{\"type\":\"text\"}}},\n" - + " \"" - + MLModel.DEPLOY_SETTING_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.IS_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_CONTROLLER_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.RATE_LIMITER_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_HASH_VALUE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_REGISTERED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_DEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UNDEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.INTERFACE_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.GUARDRAILS_FIELD - + "\" : {\n" - + " \"properties\": {\n" - + " \"input_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"output_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModel.CONNECTOR_FIELD - + "\": {" - + ML_CONNECTOR_INDEX_FIELDS - + " }\n}," - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_TASK_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_TASK_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLTask.MODEL_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.TASK_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.FUNCTION_NAME_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.STATE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.INPUT_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.PROGRESS_FIELD - + "\": {\"type\": \"float\"},\n" - + " \"" - + MLTask.OUTPUT_INDEX_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.WORKER_NODE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.LAST_UPDATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.ERROR_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + MLTask.IS_ASYNC_TASK_FIELD - + "\" : {\"type\" : \"boolean\"}, \n" - + " \"" - + MLTask.REMOTE_JOB_FIELD - + "\" : {\"type\": \"flat_object\"}, \n" - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONNECTOR_SCHEMA_VERSION - + "},\n" - + ML_CONNECTOR_INDEX_FIELDS - + ",\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + AbstractConnector.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + AbstractConnector.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONFIG_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONFIG_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MASTER_KEY - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + CONFIG_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + ML_CONFIGURATION_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONTROLLER_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONTROLLER_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLController.USER_RATE_LIMITER - + "\" : {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; - - public static final String ML_AGENT_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_AGENT_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLAgent.AGENT_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLAgent.AGENT_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + MLAgent.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLAgent.LLM_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.TOOLS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.MEMORY_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLAgent.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLAgent.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - public static final String ML_MEMORY_META_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"}\n" - + " }\n" - + "}"; + // Index mapping paths + public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json"; + public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json"; + public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json"; + public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json"; + public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json"; + public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json"; + public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json"; + public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json"; - public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; // Calculate Versions independently of OpenSearch core version public static final Version VERSION_2_11_0 = Version.fromString("2.11.0"); public static final Version VERSION_2_12_0 = Version.fromString("2.12.0"); diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java b/common/src/main/java/org/opensearch/ml/common/MLIndex.java similarity index 56% rename from ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java rename to common/src/main/java/org/opensearch/ml/common/MLIndex.java index 0cc329f1ac..c497452c6b 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java +++ b/common/src/main/java/org/opensearch/ml/common/MLIndex.java @@ -3,46 +3,42 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.ml.engine.indices; +package org.opensearch.ml.common; import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING_PATH; + +import java.io.IOException; +import java.io.UncheckedIOException; + +import org.opensearch.ml.common.utils.IndexUtils; public enum MLIndex { - MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING, ML_MODEL_GROUP_INDEX_SCHEMA_VERSION), - MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING, ML_MODEL_INDEX_SCHEMA_VERSION), - TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING, ML_TASK_INDEX_SCHEMA_VERSION), - CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING, ML_CONNECTOR_SCHEMA_VERSION), - CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING, ML_CONFIG_INDEX_SCHEMA_VERSION), - CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING, ML_CONTROLLER_INDEX_SCHEMA_VERSION), - AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING, ML_AGENT_INDEX_SCHEMA_VERSION), - MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING, ML_MEMORY_META_INDEX_SCHEMA_VERSION), - MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING, ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION); + MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING_PATH), + MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING_PATH), + TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING_PATH), + CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING_PATH), + CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING_PATH), + CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING_PATH), + AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING_PATH), + MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING_PATH), + MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH); private final String indexName; // whether we use an alias for the index @@ -50,11 +46,24 @@ public enum MLIndex { private final String mapping; private final Integer version; - MLIndex(String name, boolean alias, String mapping, Integer version) { + MLIndex(String name, boolean alias, String mappingPath) { this.indexName = name; this.alias = alias; - this.mapping = mapping; - this.version = version; + this.mapping = getMapping(mappingPath); + this.version = IndexUtils.getVersionFromMapping(this.mapping); + } + + private String getMapping(String mappingPath) { + if (mappingPath == null) { + throw new IllegalArgumentException("Mapping path cannot be null"); + } + + try { + return IndexUtils.getMappingFromFile(mappingPath); + } catch (IOException e) { + // Unchecked exception is thrown since the method is being called within a constructor + throw new UncheckedIOException("Failed to fetch index mapping from file: " + mappingPath, e); + } } public String getIndexName() { diff --git a/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java index 3a5a3427a8..723da8c07d 100644 --- a/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java +++ b/common/src/main/java/org/opensearch/ml/common/connector/MLPreProcessFunction.java @@ -11,6 +11,7 @@ import org.opensearch.ml.common.connector.functions.preprocess.BedrockEmbeddingPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.CohereEmbeddingPreProcessFunction; +import org.opensearch.ml.common.connector.functions.preprocess.CohereMultiModalEmbeddingPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.CohereRerankPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.MultiModalConnectorPreProcessFunction; import org.opensearch.ml.common.connector.functions.preprocess.OpenAIEmbeddingPreProcessFunction; @@ -21,6 +22,7 @@ public class MLPreProcessFunction { private static final Map> PRE_PROCESS_FUNCTIONS = new HashMap<>(); public static final String TEXT_DOCS_TO_COHERE_EMBEDDING_INPUT = "connector.pre_process.cohere.embedding"; + public static final String IMAGE_TO_COHERE_MULTI_MODAL_EMBEDDING_INPUT = "connector.pre_process.cohere.multimodal_embedding"; public static final String TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT = "connector.pre_process.openai.embedding"; public static final String TEXT_DOCS_TO_BEDROCK_EMBEDDING_INPUT = "connector.pre_process.bedrock.embedding"; public static final String TEXT_IMAGE_TO_BEDROCK_EMBEDDING_INPUT = "connector.pre_process.bedrock.multimodal_embedding"; @@ -37,7 +39,10 @@ public class MLPreProcessFunction { BedrockEmbeddingPreProcessFunction bedrockEmbeddingPreProcessFunction = new BedrockEmbeddingPreProcessFunction(); CohereRerankPreProcessFunction cohereRerankPreProcessFunction = new CohereRerankPreProcessFunction(); MultiModalConnectorPreProcessFunction multiModalEmbeddingPreProcessFunction = new MultiModalConnectorPreProcessFunction(); + CohereMultiModalEmbeddingPreProcessFunction cohereMultiModalEmbeddingPreProcessFunction = + new CohereMultiModalEmbeddingPreProcessFunction(); PRE_PROCESS_FUNCTIONS.put(TEXT_DOCS_TO_COHERE_EMBEDDING_INPUT, cohereEmbeddingPreProcessFunction); + PRE_PROCESS_FUNCTIONS.put(IMAGE_TO_COHERE_MULTI_MODAL_EMBEDDING_INPUT, cohereMultiModalEmbeddingPreProcessFunction); PRE_PROCESS_FUNCTIONS.put(TEXT_IMAGE_TO_BEDROCK_EMBEDDING_INPUT, multiModalEmbeddingPreProcessFunction); PRE_PROCESS_FUNCTIONS.put(TEXT_DOCS_TO_OPENAI_EMBEDDING_INPUT, openAIEmbeddingPreProcessFunction); PRE_PROCESS_FUNCTIONS.put(TEXT_DOCS_TO_DEFAULT_EMBEDDING_INPUT, openAIEmbeddingPreProcessFunction); diff --git a/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java new file mode 100644 index 0000000000..31180d7ef8 --- /dev/null +++ b/common/src/main/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunction.java @@ -0,0 +1,51 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.ml.common.connector.functions.preprocess; + +import static org.opensearch.ml.common.utils.StringUtils.convertScriptStringToJsonString; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.opensearch.ml.common.dataset.TextDocsInputDataSet; +import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet; +import org.opensearch.ml.common.input.MLInput; + +public class CohereMultiModalEmbeddingPreProcessFunction extends ConnectorPreProcessFunction { + + public CohereMultiModalEmbeddingPreProcessFunction() { + this.returnDirectlyForRemoteInferenceInput = true; + } + + @Override + public void validate(MLInput mlInput) { + validateTextDocsInput(mlInput); + List docs = ((TextDocsInputDataSet) mlInput.getInputDataset()).getDocs(); + if (docs == null || docs.isEmpty() || docs.get(0) == null) { + throw new IllegalArgumentException("No image provided"); + } + + } + + @Override + public RemoteInferenceInputDataSet process(MLInput mlInput) { + TextDocsInputDataSet inputData = (TextDocsInputDataSet) mlInput.getInputDataset(); + Map parametersMap = new HashMap<>(); + + /** + * Cohere multi-modal model expects either image or texts, not both. + * For image, customer can use this pre-process function. For texts, customer can use + * connector.pre_process.cohere.embedding + * Cohere expects An array of image data URIs for the model to embed. Maximum number of images per call is 1. + */ + parametersMap.put("images", inputData.getDocs()); + return RemoteInferenceInputDataSet + .builder() + .parameters(convertScriptStringToJsonString(Map.of("parameters", parametersMap))) + .build(); + } +} diff --git a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java index ac639babb2..88f4920761 100644 --- a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java +++ b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java @@ -18,15 +18,15 @@ package org.opensearch.ml.common.conversation; import org.opensearch.common.settings.Setting; +import org.opensearch.ml.common.MLIndex; /** * Class containing a bunch of constant defining how the conversational indices are formatted + * ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META directly for index names and mappings rather than constants */ public class ConversationalIndexConstants { - /** Version of the meta index schema */ - public final static Integer META_INDEX_SCHEMA_VERSION = 2; /** Name of the conversational metadata index */ - public final static String META_INDEX_NAME = ".plugins-ml-memory-meta"; + public final static String META_INDEX_NAME = MLIndex.MEMORY_META.getIndexName(); /** Name of the metadata field for initial timestamp */ public final static String META_CREATED_TIME_FIELD = "create_time"; /** Name of the metadata field for updated timestamp */ @@ -41,38 +41,10 @@ public class ConversationalIndexConstants { public final static String META_ADDITIONAL_INFO_FIELD = "additional_info"; /** Mappings for the conversational metadata index */ - public final static String META_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + META_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; + public final static String META_MAPPING = MLIndex.MEMORY_META.getMapping(); - /** Version of the interactions index schema */ - public final static Integer INTERACTIONS_INDEX_SCHEMA_VERSION = 1; /** Name of the conversational interactions index */ - public final static String INTERACTIONS_INDEX_NAME = ".plugins-ml-memory-message"; + public final static String INTERACTIONS_INDEX_NAME = MLIndex.MEMORY_MESSAGE.getIndexName(); /** Name of the interaction field for the conversation Id */ public final static String INTERACTIONS_CONVERSATION_ID_FIELD = "memory_id"; /** Name of the interaction field for the human input */ @@ -92,42 +64,7 @@ public class ConversationalIndexConstants { /** The trace number of an interaction */ public final static String INTERACTIONS_TRACE_NUMBER_FIELD = "trace_number"; /** Mappings for the interactions index */ - public final static String INTERACTIONS_MAPPINGS = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; + public final static String INTERACTIONS_MAPPINGS = MLIndex.MEMORY_MESSAGE.getMapping(); /** Feature Flag setting for conversational memory */ public static final Setting ML_COMMONS_MEMORY_FEATURE_ENABLED = Setting diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 298bd3ec96..92ccb07bf9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -5,8 +5,15 @@ package org.opensearch.ml.common.utils; +import java.io.IOException; +import java.net.URL; import java.util.Map; +import com.google.common.base.Charsets; +import com.google.common.io.Resources; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; + import lombok.extern.log4j.Log4j2; @Log4j2 @@ -32,4 +39,40 @@ public class IndexUtils { // Note: This does not include static settings like number of shards, which can't be changed after index creation. public static final Map UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1"); public static final Map UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all"); + + public static String getMappingFromFile(String path) throws IOException { + URL url = IndexUtils.class.getClassLoader().getResource(path); + if (url == null) { + throw new IOException("Resource not found: " + path); + } + + String mapping = Resources.toString(url, Charsets.UTF_8).trim(); + if (mapping.isEmpty() || !StringUtils.isJson(mapping)) { + throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path); + } + + return mapping; + } + + public static Integer getVersionFromMapping(String mapping) { + if (mapping == null || mapping.isBlank()) { + throw new IllegalArgumentException("Mapping cannot be null or empty"); + } + + JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); + if (mappingJson == null || !mappingJson.has("_meta")) { + throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping); + } + + JsonObject metaObject = mappingJson.getAsJsonObject("_meta"); + if (metaObject == null || !metaObject.has("schema_version")) { + throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping); + } + + try { + return metaObject.get("schema_version").getAsInt(); + } catch (NumberFormatException | ClassCastException e) { + throw new JsonParseException("Invalid \"schema_version\" value in mapping: " + mapping, e); + } + } } diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 37bfac6f3f..fcc0c4c3c9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -26,6 +26,7 @@ import com.google.gson.Gson; import com.google.gson.JsonElement; +import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonSyntaxException; import com.jayway.jsonpath.JsonPath; @@ -53,12 +54,16 @@ public class StringUtils { } public static final String TO_STRING_FUNCTION_NAME = ".toString()"; - public static boolean isValidJsonString(String Json) { + public static boolean isValidJsonString(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { - new JSONObject(Json); + new JSONObject(json); } catch (JSONException ex) { try { - new JSONArray(Json); + new JSONArray(json); } catch (JSONException ex1) { return false; } @@ -67,6 +72,10 @@ public static boolean isValidJsonString(String Json) { } public static boolean isJson(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { if (!isValidJsonString(json)) { return false; @@ -319,4 +328,12 @@ public static boolean isValidJSONPath(String input) { } } + public static JsonObject getJsonObjectFromString(String jsonString) { + if (jsonString == null || jsonString.isBlank()) { + throw new IllegalArgumentException("Json cannot be null or empty"); + } + + return JsonParser.parseString(jsonString).getAsJsonObject(); + } + } diff --git a/common/src/main/resources/index-mappings/ml-agent.json b/common/src/main/resources/index-mappings/ml-agent.json new file mode 100644 index 0000000000..2bcee6bc3b --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-agent.json @@ -0,0 +1,45 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "type": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "llm": { + "type": "flat_object" + }, + "tools": { + "type": "flat_object" + }, + "parameters": { + "type": "flat_object" + }, + "memory": { + "type": "flat_object" + }, + "is_hidden": { + "type": "boolean" + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-config.json b/common/src/main/resources/index-mappings/ml-config.json new file mode 100644 index 0000000000..6d36d8efb7 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-config.json @@ -0,0 +1,24 @@ +{ + "_meta": { + "schema_version": 4 + }, + "properties": { + "master_key": { + "type": "keyword" + }, + "config_type": { + "type": "keyword" + }, + "ml_configuration": { + "type": "flat_object" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-connector.json b/common/src/main/resources/index-mappings/ml-connector.json new file mode 100644 index 0000000000..4be168c4b9 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-connector.json @@ -0,0 +1,95 @@ +{ + "_meta": { + "schema_version": 3 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "access": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-controller.json b/common/src/main/resources/index-mappings/ml-controller.json new file mode 100644 index 0000000000..6822fb19c5 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-controller.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "schema_version": 1 + }, + "properties": { + "user_rate_limiter": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-memory-message.json b/common/src/main/resources/index-mappings/ml-memory-message.json new file mode 100644 index 0000000000..10b081aee1 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-memory-message.json @@ -0,0 +1,35 @@ +{ + "_meta": { + "schema_version": 1 + }, + "properties": { + "memory_id": { + "type": "keyword" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "input": { + "type": "text" + }, + "prompt_template": { + "type": "text" + }, + "response": { + "type": "text" + }, + "origin": { + "type": "keyword" + }, + "additional_info": { + "type": "flat_object" + }, + "parent_message_id": { + "type": "keyword" + }, + "trace_number": { + "type": "long" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-memory-meta.json b/common/src/main/resources/index-mappings/ml-memory-meta.json new file mode 100644 index 0000000000..7684e25d06 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-memory-meta.json @@ -0,0 +1,27 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "user": { + "type": "keyword" + }, + "application_type": { + "type": "keyword" + }, + "additional_info": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-model-group.json b/common/src/main/resources/index-mappings/ml-model-group.json new file mode 100644 index 0000000000..7e2437e534 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-model-group.json @@ -0,0 +1,83 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "description": { + "type": "text" + }, + "latest_version": { + "type": "integer" + }, + "model_group_id": { + "type": "keyword" + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "access": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-model.json b/common/src/main/resources/index-mappings/ml-model.json new file mode 100644 index 0000000000..b996e463cd --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-model.json @@ -0,0 +1,243 @@ +{ + "_meta": { + "schema_version": 11 + }, + "properties": { + "algorithm": { + "type": "keyword" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "long" + }, + "model_version": { + "type": "keyword" + }, + "model_group_id": { + "type": "keyword" + }, + "model_content": { + "type": "binary" + }, + "chunk_number": { + "type": "long" + }, + "total_chunks": { + "type": "long" + }, + "model_id": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "model_format": { + "type": "keyword" + }, + "model_state": { + "type": "keyword" + }, + "model_content_size_in_bytes": { + "type": "long" + }, + "planning_worker_node_count": { + "type": "integer" + }, + "current_worker_node_count": { + "type": "integer" + }, + "planning_worker_nodes": { + "type": "keyword" + }, + "deploy_to_all_nodes": { + "type": "boolean" + }, + "is_hidden": { + "type": "boolean" + }, + "model_config": { + "properties": { + "model_type": { + "type": "keyword" + }, + "embedding_dimension": { + "type": "integer" + }, + "framework_type": { + "type": "keyword" + }, + "pooling_mode": { + "type": "keyword" + }, + "normalize_result": { + "type": "boolean" + }, + "model_max_length": { + "type": "integer" + }, + "all_config": { + "type": "text" + } + } + }, + "deploy_setting": { + "type": "flat_object" + }, + "is_enabled": { + "type": "boolean" + }, + "is_controller_enabled": { + "type": "boolean" + }, + "rate_limiter": { + "type": "flat_object" + }, + "model_content_hash_value": { + "type": "keyword" + }, + "auto_redeploy_retry_times": { + "type": "integer" + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_registered_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_deployed_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_undeployed_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "interface": { + "type": "flat_object" + }, + "guardrails": { + "properties": { + "input_guardrail": { + "properties": { + "regex": { + "type": "text" + }, + "stop_words": { + "properties": { + "index_name": { + "type": "text" + }, + "source_fields": { + "type": "text" + } + } + } + } + }, + "output_guardrail": { + "properties": { + "regex": { + "type": "text" + }, + "stop_words": { + "properties": { + "index_name": { + "type": "text" + }, + "source_fields": { + "type": "text" + } + } + } + } + } + } + }, + "connector": { + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + } + } + }, + "user": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-task.json b/common/src/main/resources/index-mappings/ml-task.json new file mode 100644 index 0000000000..ad428724bf --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-task.json @@ -0,0 +1,86 @@ +{ + "_meta": { + "schema_version": 3 + }, + "properties": { + "model_id": { + "type": "keyword" + }, + "task_type": { + "type": "keyword" + }, + "function_name": { + "type": "keyword" + }, + "state": { + "type": "keyword" + }, + "input_type": { + "type": "keyword" + }, + "progress": { + "type": "float" + }, + "output_index": { + "type": "keyword" + }, + "worker_node": { + "type": "keyword" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_update_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "error": { + "type": "text" + }, + "is_async": { + "type": "boolean" + }, + "remote_job": { + "type": "flat_object" + }, + "user": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } +} diff --git a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java new file mode 100644 index 0000000000..2334811690 --- /dev/null +++ b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/CohereMultiModalEmbeddingPreProcessFunctionTest.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.ml.common.connector.functions.preprocess; + +import static org.junit.Assert.assertEquals; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.opensearch.ml.common.FunctionName; +import org.opensearch.ml.common.dataset.TextDocsInputDataSet; +import org.opensearch.ml.common.dataset.TextSimilarityInputDataSet; +import org.opensearch.ml.common.dataset.remote.RemoteInferenceInputDataSet; +import org.opensearch.ml.common.input.MLInput; + +public class CohereMultiModalEmbeddingPreProcessFunctionTest { + @Rule + public ExpectedException exceptionRule = ExpectedException.none(); + + CohereMultiModalEmbeddingPreProcessFunction function; + + TextSimilarityInputDataSet textSimilarityInputDataSet; + TextDocsInputDataSet textDocsInputDataSet; + RemoteInferenceInputDataSet remoteInferenceInputDataSet; + + MLInput textEmbeddingInput; + MLInput textSimilarityInput; + MLInput remoteInferenceInput; + + @Before + public void setUp() { + function = new CohereMultiModalEmbeddingPreProcessFunction(); + textSimilarityInputDataSet = TextSimilarityInputDataSet.builder().queryText("test").textDocs(List.of("hello")).build(); + textDocsInputDataSet = TextDocsInputDataSet.builder().docs(List.of("imageString")).build(); + remoteInferenceInputDataSet = RemoteInferenceInputDataSet.builder().parameters(Map.of("images", "value2")).build(); + + textEmbeddingInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet).build(); + textSimilarityInput = MLInput.builder().algorithm(FunctionName.TEXT_SIMILARITY).inputDataset(textSimilarityInputDataSet).build(); + remoteInferenceInput = MLInput.builder().algorithm(FunctionName.REMOTE).inputDataset(remoteInferenceInputDataSet).build(); + } + + @Test + public void testProcess_whenNullInput_expectIllegalArgumentException() { + exceptionRule.expect(IllegalArgumentException.class); + exceptionRule.expectMessage("Preprocess function input can't be null"); + function.apply(null); + } + + @Test + public void testProcess_whenWrongInput_expectIllegalArgumentException() { + exceptionRule.expect(IllegalArgumentException.class); + exceptionRule.expectMessage("This pre_process_function can only support TextDocsInputDataSet"); + function.apply(textSimilarityInput); + } + + @Test + public void testProcess_whenCorrectInput_expectCorrectOutput() { + MLInput mlInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet).build(); + RemoteInferenceInputDataSet dataSet = function.apply(mlInput); + assertEquals(1, dataSet.getParameters().size()); + assertEquals("[\"imageString\"]", dataSet.getParameters().get("images")); + + } + + @Test + public void testProcess_whenInputTextIsnull_expectIllegalArgumentException() { + exceptionRule.expect(IllegalArgumentException.class); + exceptionRule.expectMessage("No image provided"); + List docs = new ArrayList<>(); + docs.add(null); + TextDocsInputDataSet textDocsInputDataSet1 = TextDocsInputDataSet.builder().docs(docs).build(); + MLInput mlInput = MLInput.builder().algorithm(FunctionName.TEXT_EMBEDDING).inputDataset(textDocsInputDataSet1).build(); + RemoteInferenceInputDataSet dataSet = function.apply(mlInput); + } + + @Test + public void testProcess_whenRemoteInferenceInput_expectRemoteInferenceInputDataSet() { + RemoteInferenceInputDataSet dataSet = function.apply(remoteInferenceInput); + assertEquals(remoteInferenceInputDataSet, dataSet); + } +} diff --git a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java index 6ea8da20f9..4442ff7339 100644 --- a/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java +++ b/common/src/test/java/org/opensearch/ml/common/connector/functions/preprocess/MultiModalConnectorPreProcessFunctionTest.java @@ -39,7 +39,7 @@ public class MultiModalConnectorPreProcessFunctionTest { @Before public void setUp() { function = new MultiModalConnectorPreProcessFunction(); - textSimilarityInputDataSet = TextSimilarityInputDataSet.builder().queryText("test").textDocs(Arrays.asList("hello")).build(); + textSimilarityInputDataSet = TextSimilarityInputDataSet.builder().queryText("test").textDocs(List.of("hello")).build(); textDocsInputDataSet = TextDocsInputDataSet.builder().docs(Arrays.asList("hello", "world")).build(); remoteInferenceInputDataSet = RemoteInferenceInputDataSet .builder() diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index 8cfad37c98..a4b3badacf 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -6,11 +6,15 @@ package org.opensearch.ml.common.utils; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import java.io.IOException; import java.util.Map; import org.junit.Test; +import com.google.gson.JsonParseException; + public class IndexUtilsTest { @Test @@ -42,4 +46,110 @@ public void testUpdatedAllNodesReplicaIndexSettingsContainsExpectedValues() { assertEquals("index.auto_expand_replicas should be 0-all", updatedIndexSettings.get("index.auto_expand_replicas"), "0-all"); assertEquals("INDEX_SETTINGS should contain exactly 1 settings", 1, updatedIndexSettings.size()); } + + @Test + public void testGetMappingFromFile() { + String expectedMapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + try { + String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test-mapping.json"); + // comparing JsonObjects to avoid issues caused by eol character in different OS + assertEquals(StringUtils.getJsonObjectFromString(expectedMapping), StringUtils.getJsonObjectFromString(actualMapping)); + } catch (IOException e) { + throw new RuntimeException("Failed to read file at path: index-mappings/test-mapping.json"); + } + } + + @Test + public void testGetMappingFromFileFileNotFound() { + String path = "index-mappings/test-mapping-not-found.json"; + IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Resource not found: " + path, e.getMessage()); + } + + @Test + public void testGetMappingFromFilesMalformedJson() { + String path = "index-mappings/test-mapping-malformed.json"; + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Invalid or non-JSON mapping at: " + path, e.getMessage()); + } + + @Test + public void testGetVersionFromMapping() { + Integer expectedVersion = 1; + String mapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + assertEquals(expectedVersion, IndexUtils.getVersionFromMapping(mapping)); + } + + @Test + public void testGetVersionFromMappingNoMeta() { + String mapping = "{\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"_meta\" object in mapping: " + mapping, e.getMessage()); + } + + @Test + public void testGetVersionFromMappingNoSchemaVersion() { + String mapping = "{\n" + + " \"_meta\": {\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping, e.getMessage()); + } } diff --git a/common/src/test/resources/index-mappings/test-mapping-malformed.json b/common/src/test/resources/index-mappings/test-mapping-malformed.json new file mode 100644 index 0000000000..f87e98da9b --- /dev/null +++ b/common/src/test/resources/index-mappings/test-mapping-malformed.json @@ -0,0 +1,13 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + { + "malformed": } + } + } +} diff --git a/common/src/test/resources/index-mappings/test-mapping.json b/common/src/test/resources/index-mappings/test-mapping.json new file mode 100644 index 0000000000..6114de4687 --- /dev/null +++ b/common/src/test/resources/index-mappings/test-mapping.json @@ -0,0 +1,16 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + "test_field_2": { + "type": "test_type_2" + }, + "test_field_3": { + "type": "test_type_3" + } + } +} diff --git a/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md b/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md index fe910d0c79..4386251c00 100644 --- a/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md +++ b/docs/remote_inference_blueprints/cohere_connector_embedding_blueprint.md @@ -1,6 +1,6 @@ ### Cohere Embedding Connector Blueprint: -This blueprint will show you how to connect a Cohere embedding model to your Opensearch cluster, including creating a k-nn index and your own Embedding pipeline. You will require a Cohere API key to create a connector. +This blueprint will show you how to connect a Cohere embedding model to your OpenSearch cluster, including creating a k-nn index and your own Embedding pipeline. You will require a Cohere API key to create a connector. Cohere currently offers the following Embedding models (with model name and embedding dimensions). Note that only the following have been tested with the blueprint guide. @@ -97,7 +97,7 @@ The last step is to deploy your model. Use the `model_id` returned by the regist POST /_plugins/_ml/models//_deploy ``` -This will once again spawn a task to deploy your Model, with a response that will look like: +This will once again spawn a task to deploy your model, with a response that will look like: ```json { @@ -113,11 +113,11 @@ You can run the GET tasks request again to verify the status. GET /_plugins/_ml/tasks/ ``` -Once this is complete, your Model is deployed and ready! +Once this is complete, your model is deployed and ready! ##### 1e. Test model -You can try this request to test that the Model behaves correctly: +You can try this request to test that the model behaves correctly: ```json POST /_plugins/_ml/models//_predict diff --git a/docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md b/docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md new file mode 100644 index 0000000000..06af2c9b90 --- /dev/null +++ b/docs/remote_inference_blueprints/cohere_connector_image_embedding_blueprint.md @@ -0,0 +1,324 @@ +### Cohere Embedding Connector Blueprint: + +This blueprint will show you how to connect a Cohere multi-modal embedding model to your OpenSearch cluster, including creating a k-nn index and your own Embedding pipeline. You will require a Cohere API key to create a connector. + +Cohere currently offers the following Embedding models (with model name and embedding dimensions). Note that only the following have been tested with the blueprint guide. + +- embed-english-v3.0 1024 +- embed-english-v2.0 4096 + +See [Cohere's /embed API docs](https://docs.cohere.com/reference/embed) for more details. + +#### 1. Create a connector and model group + +##### 1a. Register model group + +```json +POST /_plugins/_ml/model_groups/_register + +{ + "name": "cohere_model_group", + "description": "Your Cohere model group" +} +``` + +This request response will return the `model_group_id`, note it down. + +##### 1b. Create a connector + +See above for all the values the `parameters > model` parameter can take. + +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "Cohere Embed Model", + "description": "The connector to Cohere's public embed API", + "version": "1", + "protocol": "http", + "credential": { + "cohere_key": "" + }, + "parameters": { + "model": "", // Choose a Model from the provided list above + "input_type":"image", + "truncate": "END" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "https://api.cohere.ai/v1/embed", + "headers": { + "Authorization": "Bearer ${credential.cohere_key}", + "Request-Source": "unspecified:opensearch" + }, + "request_body": "{ \"images\": ${parameters.images}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }", + "pre_process_function": "connector.pre_process.cohere.multimodal_embedding", + "post_process_function": "connector.post_process.cohere.embedding" + } + ] +} +``` + +This request response will return the `connector_id`, note it down. + +##### 1c. Register a model with your connector + +You can now register your model with the `model_group_id` and `connector_id` created from the previous steps. + +```json +POST /_plugins/_ml/models/_register +Content-Type: application/json + +{ + "name": "Cohere Embed Model", + "function_name": "remote", + "model_group_id": "", + "description": "Your Cohere Embedding Model", + "connector_id": "" +} +``` + +This will create a registration task, the response should look like: + +```json +{ + "task_id": "9bXpRY0BRil1qhQaUK-u", + "status": "CREATED", + "model_id": "9rXpRY0BRil1qhQaUK_8" +} +``` + +##### 1d. Deploy model + +The last step is to deploy your model. Use the `model_id` returned by the registration request, and run: + +```json +POST /_plugins/_ml/models//_deploy +``` + +This will once again spawn a task to deploy your model, with a response that will look like: + +```json +{ + "task_id": "97XrRY0BRil1qhQaQK_c", + "task_type": "DEPLOY_MODEL", + "status": "COMPLETED" +} +``` + +You can run the GET tasks request again to verify the status. + +```json +GET /_plugins/_ml/tasks/ +``` + +Once this is complete, your model is deployed and ready! + +##### 1e. Test model + +You can try this request to test that the model behaves correctly: + +```json +POST /_plugins/_ml/models//_predict +{ + "parameters": { + "images": [""] + } +} +``` + +It should return a response similar to this: + +```json +{ + "inference_results": [ + { + "output": [ + { + "name": "sentence_embedding", + "data_type": "FLOAT32", + "shape": [ + 1024 + ], + "data": [ + -0.0024547577, + 0.0062217712, + -0.01675415, + -0.020736694, + -0.020263672, + ... ... + 0.038635254 + ] + } + ], + "status_code": 200 + } + ] +} +``` + +#### (Optional) 2. Setup k-NN index and ingestion pipeline + +##### 2a. Create your pipeline + +It is important that the `field_map` parameter contains all the document fields you'd like to embed as a vector. The key value is the document field name, and the value will be the field containing the embedding. + +```json +PUT /_ingest/pipeline/cohere-ingest-pipeline +{ + "description": "Test Cohere Embedding pipeline", + "processors": [ + { + "text_embedding": { + "model_id": "", + "field_map": { + "image_base64": "image_embedding" + } + } + } + ] +} +``` + +Sample response: + +```json +{ + "acknowledged": true +} +``` + +##### 2b. Create a k-NN index + +Here `cohere-nlp-index` is the name of your index, you can change it as needed. + +````json +PUT /cohere-nlp-index + +{ + "settings": { + "index.knn": true, + "default_pipeline": "cohere-ingest-pipeline" + }, + "mappings": { + "properties": { + "id": { + "type": "text" + }, + "image_embedding": { + "type": "knn_vector", + "dimension": 1024, + "method": { + "engine": "lucene", + "space_type": "l2", + "name": "hnsw", + "parameters": {} + } + }, + "image_base64": { + "type": "text" + } + } + } +} + +Sample response: + +```json +{ + "acknowledged": true, + "shards_acknowledged": true, + "index": "cohere-nlp-index" +} +```` + +##### 2c. Testing the index and pipeline + +First, you can insert a record: + +```json +PUT /cohere-nlp-index/_doc/1 +{ + "image_base64": "", + "id": "c1" +} +``` + +Sample response: + +```json +{ + "_index": "cohere-nlp-index", + "_id": "1", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 1, + "failed": 0 + }, + "_seq_no": 0, + "_primary_term": 1 +} +``` + +The last step is to check that the embeddings were properly created. Notice that the embedding field created corresponds to the `field_map` mapping you defined in step 3a. + +```json +GET /cohere-nlp-index/\_search + +{ + "query": { + "match_all": {} + } +} +``` + +Sample response: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1, + "hits": [ + { + "_index": "cohere-nlp-index", + "_id": "1", + "_score": 1, + "_source": { + "image_base64": "", + "image_embedding": [ + 0.02494812, + -0.009391785, + -0.015716553, + -0.051849365, + -0.015930176, + -0.024734497, + -0.028518677, + -0.008323669, + -0.008323669, + ............. + + ], + "id": "c1" + } + } + ] + } +} +``` + +Congratulations! You've successfully created your ingestion pipeline. diff --git a/memory/build.gradle b/memory/build.gradle index b6198509d0..8251303158 100644 --- a/memory/build.gradle +++ b/memory/build.gradle @@ -37,6 +37,7 @@ dependencies { testImplementation "org.opensearch.test:framework:${opensearch_version}" testImplementation "org.opensearch.client:opensearch-rest-client:${opensearch_version}" testImplementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1' + testImplementation group: 'org.json', name: 'json', version: '20231013' } test { diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java index e6a15ecdae..9efe9372b8 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java @@ -8,7 +8,6 @@ import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; import static org.opensearch.ml.common.MLModel.MODEL_STATE_FIELD; @@ -40,6 +39,7 @@ import org.opensearch.ml.common.AccessMode; import org.opensearch.ml.common.CommonValue; import org.opensearch.ml.common.FunctionName; +import org.opensearch.ml.common.MLIndex; import org.opensearch.ml.common.MLModel; import org.opensearch.ml.common.MLModelGroup; import org.opensearch.ml.common.MLTask; @@ -131,7 +131,7 @@ public void execute(Input input, ActionListener actionListener = mock(ActionListener.class); remoteModel.asyncPredict(mlInput, actionListener); ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); @@ -86,7 +95,18 @@ public void predict_NullConnectorExecutor() { } @Test - public void predict_ModelDeployed_WrongInput() { + public void asyncPredict_ModelDeployed_WrongInput() { + asyncPredict_ModelDeployed_WrongInput("pre_process_function not defined in connector"); + } + + @Test + public void asyncPredict_With_RemoteInferenceInputDataSet() { + when(mlInput.getInputDataset()).thenReturn( + new RemoteInferenceInputDataSet(Collections.emptyMap(), ConnectorAction.ActionType.BATCH_PREDICT)); + asyncPredict_ModelDeployed_WrongInput("no BATCH_PREDICT action found"); + } + + private void asyncPredict_ModelDeployed_WrongInput(String expExceptionMessage) { Connector connector = createConnector(ImmutableMap.of("Authorization", "Bearer ${credential.key}")); when(mlModel.getConnector()).thenReturn(connector); remoteModel.initModel(mlModel, ImmutableMap.of(), encryptor); @@ -95,16 +115,71 @@ public void predict_ModelDeployed_WrongInput() { ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(actionListener).onFailure(argumentCaptor.capture()); assert argumentCaptor.getValue() instanceof RuntimeException; - assertEquals("pre_process_function not defined in connector", argumentCaptor.getValue().getMessage()); + assertEquals(expExceptionMessage, argumentCaptor.getValue().getMessage()); } @Test - public void initModel_RuntimeException() { - exceptionRule.expect(IllegalArgumentException.class); - exceptionRule.expectMessage("Tag mismatch!"); + public void asyncPredict_Failure_With_RuntimeException() { + asyncPredict_Failure_With_Throwable( + new RuntimeException("Remote Connection Exception!"), + RuntimeException.class, + "Remote Connection Exception!" + ); + } + + @Test + public void asyncPredict_Failure_With_Throwable() { + asyncPredict_Failure_With_Throwable( + new Error("Remote Connection Error!"), + MLException.class, + "java.lang.Error: Remote Connection Error!" + ); + } + + private void asyncPredict_Failure_With_Throwable( + Throwable actualException, + Class expExceptionClass, + String expExceptionMessage + ) { + ActionListener actionListener = mock(ActionListener.class); + doThrow(actualException) + .when(remoteConnectorExecutor) + .executeAction(ConnectorAction.ActionType.PREDICT.toString(), mlInput, actionListener); + try (MockedStatic loader = mockStatic(MLEngineClassLoader.class)) { + Connector connector = createConnector(ImmutableMap.of("Authorization", "Bearer ${credential.key}")); + when(mlModel.getConnector()).thenReturn(connector); + loader + .when(() -> MLEngineClassLoader.initInstance(connector.getProtocol(), connector, Connector.class)) + .thenReturn(remoteConnectorExecutor); + remoteModel.initModel(mlModel, ImmutableMap.of(), encryptor); + remoteModel.asyncPredict(mlInput, actionListener); + ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Exception.class); + verify(actionListener).onFailure(argumentCaptor.capture()); + assert expExceptionClass.isInstance(argumentCaptor.getValue()); + assertEquals(expExceptionMessage, argumentCaptor.getValue().getMessage()); + } + } + + @Test + public void initModel_Failure_With_RuntimeException() { + initModel_Failure_With_Throwable(new IllegalArgumentException("Tag mismatch!"), IllegalArgumentException.class, "Tag mismatch!"); + } + + @Test + public void initModel_Failure_With_Throwable() { + initModel_Failure_With_Throwable(new Error("Decryption Error!"), MLException.class, "Decryption Error!"); + } + + private void initModel_Failure_With_Throwable( + Throwable actualException, + Class expExcepClass, + String expExceptionMessage + ) { + exceptionRule.expect(expExcepClass); + exceptionRule.expectMessage(expExceptionMessage); Connector connector = createConnector(null); when(mlModel.getConnector()).thenReturn(connector); - doThrow(new IllegalArgumentException("Tag mismatch!")).when(encryptor).decrypt(any()); + doThrow(actualException).when(encryptor).decrypt(any()); remoteModel.initModel(mlModel, ImmutableMap.of(), encryptor); } @@ -129,7 +204,6 @@ public void initModel_WithHeader() { Assert.assertNotNull(executor.getConnector().getDecryptedHeaders()); assertEquals(1, executor.getConnector().getDecryptedHeaders().size()); assertEquals("Bearer test_api_key", executor.getConnector().getDecryptedHeaders().get("Authorization")); - remoteModel.close(); Assert.assertNull(remoteModel.getConnectorExecutor()); } diff --git a/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java b/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java index 021397fae0..2026a203b9 100644 --- a/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java +++ b/ml-algorithms/src/test/java/org/opensearch/ml/engine/indices/MLIndicesHandlerTest.java @@ -96,7 +96,7 @@ public void setUp() { when(agentindexMetadata.mapping()).thenReturn(agentmappingMetadata); when(memorymetaindexMetadata.mapping()).thenReturn(memorymappingMetadata); when(agentmappingMetadata.getSourceAsMap()).thenReturn(Map.of(META, Map.of(SCHEMA_VERSION_FIELD, Integer.valueOf(2)))); - when(memorymappingMetadata.getSourceAsMap()).thenReturn(Map.of(META, Map.of(SCHEMA_VERSION_FIELD, Integer.valueOf(1)))); + when(memorymappingMetadata.getSourceAsMap()).thenReturn(Map.of(META, Map.of(SCHEMA_VERSION_FIELD, Integer.valueOf(2)))); settings = Settings.builder().put("test_key", 10).build(); threadContext = new ThreadContext(settings); when(client.threadPool()).thenReturn(threadPool); diff --git a/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java b/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java index 2092b9f4b4..6f710ea1de 100644 --- a/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java +++ b/plugin/src/test/java/org/opensearch/ml/rest/MLCommonsRestTestCase.java @@ -1002,15 +1002,11 @@ public String registerRemoteModel(String createConnectorInput, String modelName, String connectorId = (String) responseMap.get("connector_id"); response = RestMLRemoteInferenceIT.registerRemoteModel(modelName, modelName, connectorId); responseMap = parseResponseToMap(response); - String taskId = (String) responseMap.get("task_id"); - waitForTask(taskId, MLTaskState.COMPLETED); - response = RestMLRemoteInferenceIT.getTask(taskId); - responseMap = parseResponseToMap(response); String modelId = (String) responseMap.get("model_id"); if (deploy) { response = RestMLRemoteInferenceIT.deployRemoteModel(modelId); responseMap = parseResponseToMap(response); - taskId = (String) responseMap.get("task_id"); + String taskId = (String) responseMap.get("task_id"); waitForTask(taskId, MLTaskState.COMPLETED); } return modelId;