Skip to content

Commit

Permalink
Add ML Stats APIs (#794)
Browse files Browse the repository at this point in the history
* Add ML get stats APIs.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Added 'non-existent' to index status enum values, added predict and train property to the response schema.

Signed-off-by: Nathalie Jonathan <[email protected]>

* Added predict and train property to get stats respone schema.

Signed-off-by: Nathalie Jonathan <[email protected]>

---------

Signed-off-by: Nathalie Jonathan <[email protected]>
  • Loading branch information
nathaliellenaa authored Jan 15, 2025
1 parent 806b25f commit 362261b
Show file tree
Hide file tree
Showing 4 changed files with 287 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- Added `GET /_plugins/_ml/models/{model_id}`, `POST /_plugins/_ml/models/_search`, `POST /_plugins/_ml/models/_unload`, `_undeploy`, `_upload`, `meta`, `_register_meta`, `POST /_plugins/_ml/models/{model_id}/_load`, `_predict`, `_unload`, `chunk/{chunk_number}`, `upload_chunk/{chunk_number}`, and `PUT /_plugins/_ml/models/{model_id}` ([#733](https://github.com/opensearch-project/opensearch-api-specification/pull/733))
- Added `GET`, `POST`, `PUT`, `DELETE /_plugins/_ml/controllers/{model_id}` ([#779](https://github.com/opensearch-project/opensearch-api-specification/pull/779))
- Added `GET /_plugins/_ml/profile`, `GET /_plugins/_ml/profile/models`, `models/{model_id}`, `tasks`, `tasks/{task_id}` ([#787](https://github.com/opensearch-project/opensearch-api-specification/pull/787))
- Added `GET /_plugins/_ml/stats/`, `stats/{stat}`, `{nodeId}/stats/`, `{nodeId}/stats/{stat}` ([#794](https://github.com/opensearch-project/opensearch-api-specification/pull/794))

### Removed
- Removed unsupported `_common.mapping:SourceField`'s `mode` field and associated `_common.mapping:SourceFieldMode` enum ([#652](https://github.com/opensearch-project/opensearch-api-specification/pull/652))
Expand Down
68 changes: 68 additions & 0 deletions spec/namespaces/ml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,49 @@ paths:
responses:
'200':
$ref: '#/components/responses/ml.get_profile_tasks@200'
/_plugins/_ml/stats:
get:
operationId: ml.get_stats.0
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
/_plugins/_ml/stats/{stat}:
get:
operationId: ml.get_stats.1
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
parameters:
- $ref: '#/components/parameters/ml.get_stats::path.stat'
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
/_plugins/_ml/{node_id}/stats/:
get:
operationId: ml.get_stats.2
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
parameters:
- $ref: '#/components/parameters/ml.get_stats::path.node_id'
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
/_plugins/_ml/{node_id}/stats/{stat}:
get:
operationId: ml.get_stats.3
x-operation-group: ml.get_stats
x-version-added: '1.3'
description: Get stats.
parameters:
- $ref: '#/components/parameters/ml.get_stats::path.node_id'
- $ref: '#/components/parameters/ml.get_stats::path.stat'
responses:
'200':
$ref: '#/components/responses/ml.get_stats@200'
components:
requestBodies:
ml.register_model_group:
Expand Down Expand Up @@ -1660,6 +1703,11 @@ components:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetProfileResponse'
ml.get_stats@200:
content:
application/json:
schema:
$ref: '../schemas/ml._common.yaml#/components/schemas/GetStatsResponse'
parameters:
ml.get_model_group::path.model_group_id:
name: model_group_id
Expand Down Expand Up @@ -1901,5 +1949,25 @@ components:
name: task_id
in: path
required: true
schema:
type: string
ml.get_stats::path.stat:
name: stat
in: path
required: true
schema:
type: string
enum:
- ml_config_index_status
- ml_connector_count
- ml_connector_index_status
- ml_controller_index_status
- ml_model_count
- ml_model_index_status
- ml_task_index_status
ml.get_stats::path.node_id:
name: node_id
in: path
required: true
schema:
type: string
142 changes: 141 additions & 1 deletion spec/schemas/ml._common.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,16 @@ components:
type: integer
format: int64
description: The estimated memory size in GPU.
deploy:
$ref: '#/components/schemas/Deploy'
register:
$ref: '#/components/schemas/Register'
undeploy:
$ref: '#/components/schemas/Undeploy'
predict:
$ref: '#/components/schemas/Predict'
train:
$ref: '#/components/schemas/Train'
PredictRequestStats:
type: object
properties:
Expand Down Expand Up @@ -1367,4 +1377,134 @@ components:
Tasks:
type: object
additionalProperties:
$ref: '#/components/schemas/Task'
$ref: '#/components/schemas/Task'
GetStatsResponse:
type: object
properties:
ml_model_count:
type: integer
format: int64
description: The model count.
ml_connector_index_status:
type: string
description: The connector index status.
enum:
- green
- non-existent
- red
- yellow
ml_config_index_status:
type: string
description: The config index status.
enum:
- green
- non-existent
- red
- yellow
ml_task_index_status:
type: string
description: The task index status.
enum:
- green
- non-existent
- red
- yellow
ml_connector_count:
type: integer
format: int64
description: The connector count.
ml_model_index_status:
type: string
description: The model index status.
enum:
- green
- non-existent
- red
- yellow
ml_controller_index_status:
type: string
description: The controller index status.
enum:
- green
- non-existent
- red
- yellow
nodes:
$ref: '#/components/schemas/NodeStats'
NodeStats:
type: object
additionalProperties:
$ref: '#/components/schemas/NodeStatsDetails'
NodeStatsDetails:
type: object
properties:
ml_deployed_model_count:
type: integer
format: int64
description: The deployed model count.
ml_jvm_heap_usage:
type: integer
format: int64
description: The JVM heap usage.
ml_failure_count:
type: integer
format: int64
description: The failure count.
ml_executing_task_count:
type: integer
format: int64
description: The executing task count.
ml_circuit_breaker_trigger_count:
type: integer
format: int64
description: The circuit breaker trigger count.
ml_request_count:
type: integer
format: int64
description: The request count.
algorithms:
$ref: '#/components/schemas/Algorithms'
models:
$ref: '#/components/schemas/Models'
Algorithms:
type: object
additionalProperties:
$ref: '#/components/schemas/AlgorithmOperations'
AlgorithmOperations:
type: object
properties:
deploy:
$ref: '#/components/schemas/Deploy'
register:
$ref: '#/components/schemas/Register'
undeploy:
$ref: '#/components/schemas/Undeploy'
predict:
$ref: '#/components/schemas/Predict'
train:
$ref: '#/components/schemas/Train'
ModelStasts:
type: object
properties:
ml_action_request_count:
type: integer
format: int64
description: The request count.
ml_action_failure_count:
type: integer
format: int64
description: The failure count.
ml_executing_task_count:
type: integer
format: int64
description: The executing task count.
Deploy:
$ref: '#/components/schemas/ModelStasts'
Register:
$ref: '#/components/schemas/ModelStasts'
Undeploy:
$ref: '#/components/schemas/ModelStasts'
Predict:
$ref: '#/components/schemas/ModelStasts'
Train:
$ref: '#/components/schemas/ModelStasts'
77 changes: 77 additions & 0 deletions tests/plugins/ml/ml/stats.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
$schema: ../../../../json_schemas/test_story.schema.yaml

description: Test the retrieval of the statistics.
version: '>= 2.7'
warnings:
multiple-paths-detected: false
prologues:
- path: /_cluster/settings
method: PUT
request:
payload:
persistent:
plugins.ml_commons.jvm_heap_memory_threshold: 100
- path: /_plugins/_ml/models/_register
id: register_model
method: POST
request:
payload:
name: huggingface/sentence-transformers/msmarco-distilbert-base-tas-b
version: 1.0.1
model_format: TORCH_SCRIPT
output:
task_id: payload.task_id
- path: /_plugins/_ml/tasks/{task_id}
id: get_completed_task
method: GET
parameters:
task_id: ${register_model.task_id}
retry:
count: 3
wait: 10000
response:
status: 200
payload:
state: COMPLETED
output:
model_id: payload.model_id
node_id: payload.worker_node[0]
epilogues:
- path: /_plugins/_ml/models/{model_id}
parameters:
model_id: ${get_completed_task.model_id}
method: DELETE
status: [200, 404]
- path: /_plugins/_ml/tasks/{task_id}
parameters:
task_id: ${register_model.task_id}
method: DELETE
status: [200, 404]
chapters:
- synopsis: Get all stats for all nodes.
path: /_plugins/_ml/stats
method: GET
response:
status: 200
- synopsis: Get a specified stat for all nodes.
path: /_plugins/_ml/stats/{stat}
method: GET
parameters:
stat: ml_model_index_status
response:
status: 200
- synopsis: Get all stats for a specific node.
path: /_plugins/_ml/{node_id}/stats/
method: GET
parameters:
node_id: ${get_completed_task.node_id}
response:
status: 200
- synopsis: Get a specified stat for a specific node.
path: /_plugins/_ml/{node_id}/stats/{stat}
method: GET
parameters:
node_id: ${get_completed_task.node_id}
stat: ml_task_index_status
response:
status: 200

0 comments on commit 362261b

Please sign in to comment.