Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

separate doc-level monitor query indices for externally defined monitors #1664

Merged
merged 4 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
// Clean up any queries created by the dry run monitor
monitorCtx.docLevelMonitorQueries!!.deleteDocLevelQueriesOnDryRun(monitorMetadata)
}

// TODO: Update the Document as part of the Trigger and return back the trigger action result
return monitorResult.copy(triggerResults = triggerResults, inputResults = inputRunResults)
} catch (e: Exception) {
Expand All @@ -387,6 +388,17 @@ class DocumentLevelMonitorRunner : MonitorRunner() {
)
return monitorResult.copy(error = alertingException, inputResults = InputRunResults(emptyList(), alertingException))
} finally {
if (monitor.deleteQueryIndexInEveryRun == true &&
monitorCtx.docLevelMonitorQueries!!.docLevelQueryIndexExists(monitor.dataSources)
) {
val ack = monitorCtx.docLevelMonitorQueries!!.deleteDocLevelQueryIndex(monitor.dataSources)
if (!ack) {
logger.error(
"Deletion of concrete queryIndex:${monitor.dataSources.queryIndex} is not ack'd! " +
"for monitor ${monitor.id}"
)
}
}
val endTime = System.currentTimeMillis()
totalTimeTakenStat = endTime - startTime
logger.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,57 +95,69 @@ object DeleteMonitorService :

private suspend fun deleteDocLevelMonitorQueriesAndIndices(monitor: Monitor) {
try {
val metadata = MonitorMetadataService.getMetadata(monitor)
metadata?.sourceToQueryIndexMapping?.forEach { (_, queryIndex) ->
if (monitor.deleteQueryIndexInEveryRun == false) {
val metadata = MonitorMetadataService.getMetadata(monitor)
metadata?.sourceToQueryIndexMapping?.forEach { (_, queryIndex) ->

val indicesExistsResponse: IndicesExistsResponse =
client.suspendUntil {
client.admin().indices().exists(IndicesExistsRequest(queryIndex), it)
val indicesExistsResponse: IndicesExistsResponse =
client.suspendUntil {
client.admin().indices().exists(IndicesExistsRequest(queryIndex), it)
}
if (indicesExistsResponse.isExists == false) {
return
}
if (indicesExistsResponse.isExists == false) {
return
}
// Check if there's any queries from other monitors in this queryIndex,
// to avoid unnecessary doc deletion, if we could just delete index completely
val searchResponse: SearchResponse = client.suspendUntil {
search(
SearchRequest(queryIndex).source(
SearchSourceBuilder()
.size(0)
.query(
QueryBuilders.boolQuery().mustNot(
QueryBuilders.matchQuery("monitor_id", monitor.id)
// Check if there's any queries from other monitors in this queryIndex,
// to avoid unnecessary doc deletion, if we could just delete index completely
val searchResponse: SearchResponse = client.suspendUntil {
search(
SearchRequest(queryIndex).source(
SearchSourceBuilder()
.size(0)
.query(
QueryBuilders.boolQuery().mustNot(
QueryBuilders.matchQuery("monitor_id", monitor.id)
)
)
)
).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
if (searchResponse.hits.totalHits.value == 0L) {
val ack: AcknowledgedResponse = client.suspendUntil {
client.admin().indices().delete(
DeleteIndexRequest(queryIndex).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
if (ack.isAcknowledged == false) {
log.error("Deletion of concrete queryIndex:$queryIndex is not ack'd!")
}
} else {
// Delete all queries added by this monitor
val response: BulkByScrollResponse = suspendCoroutine { cont ->
DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE)
.source(queryIndex)
.filter(QueryBuilders.matchQuery("monitor_id", monitor.id))
.refresh(true)
.execute(
object : ActionListener<BulkByScrollResponse> {
override fun onResponse(response: BulkByScrollResponse) = cont.resume(response)
override fun onFailure(t: Exception) = cont.resumeWithException(t)
}
if (searchResponse.hits.totalHits.value == 0L) {
val ack: AcknowledgedResponse = client.suspendUntil {
client.admin().indices().delete(
DeleteIndexRequest(queryIndex).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
if (ack.isAcknowledged == false) {
log.error("Deletion of concrete queryIndex:$queryIndex is not ack'd!")
}
} else {
// Delete all queries added by this monitor
val response: BulkByScrollResponse = suspendCoroutine { cont ->
DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE)
.source(queryIndex)
.filter(QueryBuilders.matchQuery("monitor_id", monitor.id))
.refresh(true)
.execute(
object : ActionListener<BulkByScrollResponse> {
override fun onResponse(response: BulkByScrollResponse) = cont.resume(response)
override fun onFailure(t: Exception) = cont.resumeWithException(t)
}
)
}
}
}
} else {
val ack: AcknowledgedResponse = client.suspendUntil {
client.admin().indices().delete(
DeleteIndexRequest(monitor.dataSources.queryIndex).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
if (ack.isAcknowledged == false) {
log.error("Deletion of concrete queryIndex:${monitor.dataSources.queryIndex} is not ack'd!")
}
}
} catch (e: Exception) {
// we only log the error and don't fail the request because if monitor document has been deleted successfully,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,8 @@ class TransportIndexMonitorAction @Inject constructor(
if (
request.monitor.isMonitorOfStandardType() &&
Monitor.MonitorType.valueOf(request.monitor.monitorType.uppercase(Locale.ROOT)) ==
Monitor.MonitorType.DOC_LEVEL_MONITOR
Monitor.MonitorType.DOC_LEVEL_MONITOR &&
request.monitor.deleteQueryIndexInEveryRun == false
) {
indexDocLevelMonitorQueries(request.monitor, indexResponse.id, metadata, request.refreshPolicy)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/opensearch-project/alerting/pull/1664/files#diff-58c6e3aa339ebbcc7ee2452426f17164dc91d435c1f24ee55fdc4f06ec1a371eR546

here source to query mapping is updated but in case of new change query index doesnt even get created and wrong value would come up right?
this needs to be handled too

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the source to query mapping is dynamically updated during the first monitor run as shown in this logic. https://github.com/opensearch-project/alerting/blob/main/alerting/src/main/kotlin/org/opensearch/alerting/util/DocLevelMonitorQueries.kt#L481

Copy link
Member

@eirsep eirsep Sep 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this method being called for the monitors with dedicated queryindex also?
https://github.com/opensearch-project/alerting/blob/main/alerting/src/main/kotlin/org/opensearch/alerting/util/DocLevelMonitorQueries.kt#L473C25-L473C49

this code snippet will always throw exception right?

  targetQueryIndex = getWriteIndexNameForAlias(monitor.dataSources.queryIndex)
            if (targetQueryIndex == null) {
                val message = "Failed to get write index for queryIndex alias:${monitor.dataSources.queryIndex}"
                log.error(message)
                throw AlertingException.wrap(
                    OpenSearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR)
                )
            }

because the queryIndex is not an alias anymore?

Expand Down Expand Up @@ -702,13 +703,22 @@ class TransportIndexMonitorAction @Inject constructor(
Monitor.MonitorType.valueOf(currentMonitor.monitorType.uppercase(Locale.ROOT)) == Monitor.MonitorType.DOC_LEVEL_MONITOR
) {
updatedMetadata = MonitorMetadataService.recreateRunContext(metadata, currentMonitor)
client.suspendUntil<Client, BulkByScrollResponse> {
DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE)
.source(currentMonitor.dataSources.queryIndex)
.filter(QueryBuilders.matchQuery("monitor_id", currentMonitor.id))
.execute(it)
if (docLevelMonitorQueries.docLevelQueryIndexExists(currentMonitor.dataSources)) {
client.suspendUntil<Client, BulkByScrollResponse> {
DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE)
.source(currentMonitor.dataSources.queryIndex)
.filter(QueryBuilders.matchQuery("monitor_id", currentMonitor.id))
.execute(it)
}
}
if (currentMonitor.deleteQueryIndexInEveryRun == false) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


plz change it to error log

indexDocLevelMonitorQueries(
request.monitor,
currentMonitor.id,
updatedMetadata,
request.refreshPolicy
)
}
indexDocLevelMonitorQueries(request.monitor, currentMonitor.id, updatedMetadata, request.refreshPolicy)
MonitorMetadataService.upsertMetadata(updatedMetadata, updating = true)
}
actionListener.onResponse(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest
import org.opensearch.action.bulk.BulkRequest
import org.opensearch.action.bulk.BulkResponse
import org.opensearch.action.index.IndexRequest
import org.opensearch.action.support.IndicesOptions
import org.opensearch.action.support.WriteRequest.RefreshPolicy
import org.opensearch.action.support.master.AcknowledgedResponse
import org.opensearch.alerting.MonitorRunnerService.monitorCtx
Expand Down Expand Up @@ -181,6 +182,16 @@ class DocLevelMonitorQueries(private val client: Client, private val clusterServ
}
}

suspend fun deleteDocLevelQueryIndex(dataSources: DataSources): Boolean {
val ack: AcknowledgedResponse = client.suspendUntil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we do exists check before deleting

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed it.

client.admin().indices().delete(
DeleteIndexRequest(dataSources.queryIndex).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
return ack.isAcknowledged
}

fun docLevelQueryIndexExists(dataSources: DataSources): Boolean {
val clusterState = clusterService.state()
return clusterState.metadata.hasAlias(dataSources.queryIndex)
Expand Down Expand Up @@ -434,6 +445,7 @@ class DocLevelMonitorQueries(private val client: Client, private val clusterServ
)
)
indexRequests.add(indexRequest)
log.debug("query $query added for execution of monitor $monitorId on index $sourceIndex")
}
log.debug("bulk inserting percolate [${queries.size}] queries")
if (indexRequests.isNotEmpty()) {
Expand Down Expand Up @@ -478,7 +490,12 @@ class DocLevelMonitorQueries(private val client: Client, private val clusterServ
updatedProperties: MutableMap<String, Any>
): Pair<AcknowledgedResponse, String> {
var targetQueryIndex = monitorMetadata.sourceToQueryIndexMapping[sourceIndex + monitor.id]
if (targetQueryIndex == null) {
if (
targetQueryIndex == null || (
targetQueryIndex != monitor.dataSources.queryIndex &&
monitor.deleteQueryIndexInEveryRun == true
)
) {
// queryIndex is alias which will always have only 1 backing index which is writeIndex
// This is due to a fact that that _rollover API would maintain only single index under alias
// if you don't add is_write_index setting when creating index initially
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1195,7 +1195,8 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() {
dataSources = DataSources(
queryIndex = customQueryIndex,
queryIndexMappingsByType = mapOf(Pair("text", mapOf(Pair("analyzer", analyzer)))),
)
),
owner = "alerting"
)
try {
createMonitor(monitor)
Expand Down Expand Up @@ -2379,7 +2380,9 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() {
val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN)
var monitor = randomDocumentLevelMonitor(
inputs = listOf(docLevelInput),
triggers = listOf(trigger)
triggers = listOf(trigger),
dataSources = DataSources(),
owner = "alerting"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz add 2 new tests-

  1. scenario where delete query index flag is false and monitor execution fails due to mapping conflict on alias rollover
  2. scenario where delete query index flag is true and monitor execution succeeds

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in first test plz also update monitor with flag false to true and do rollover again and this time assert monitor execution succeeeds

)
// This doc should create close to 10000 (limit) fields in index mapping. It's easier to add mappings like this then via api
val docPayload: StringBuilder = StringBuilder(100000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ class AlertingBackwardsCompatibilityIT : AlertingRestTestCase() {
val indexName = "test_bwc_index"
val bwcMonitorString = """
{
"owner": "alerting",
"type": "monitor",
"name": "test_bwc_monitor",
"enabled": true,
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/resources/mappings/scheduled-jobs.json
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,9 @@
}
}
},
"delete_query_index_in_every_run": {
"type": "boolean"
},
"ui_metadata": {
"type": "object",
"enabled": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient
"id", null)), trigger1Serialized)),
Map.of(),
new DataSources(),
true,
"sample-remote-monitor-plugin"
);
IndexMonitorRequest indexMonitorRequest1 = new IndexMonitorRequest(
Expand Down Expand Up @@ -154,6 +155,7 @@ public void onFailure(Exception e) {
List.of(),
Map.of(),
new DataSources(),
true,
"sample-remote-monitor-plugin"
);
IndexMonitorRequest indexMonitorRequest2 = new IndexMonitorRequest(
Expand Down Expand Up @@ -237,6 +239,7 @@ public void onFailure(Exception e) {
"id", null)), trigger1Serialized)),
Map.of(),
new DataSources(),
true,
"sample-remote-monitor-plugin"
);
IndexMonitorRequest indexDocLevelMonitorRequest = new IndexMonitorRequest(
Expand Down
Loading