Skip to content

Commit

Permalink
Support OpenShift Logging 6.x
Browse files Browse the repository at this point in the history
  • Loading branch information
DebakelOrakel committed Dec 21, 2024
1 parent 8710667 commit c6d5280
Show file tree
Hide file tree
Showing 70 changed files with 432 additions and 3,025 deletions.
18 changes: 4 additions & 14 deletions alerts.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,7 @@
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.6/internal/metrics/alerts/fluentd.go.FluentdPrometheusAlert release-5.6/fluentd_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.7/internal/metrics/alerts/fluentd.go.FluentdPrometheusAlert release-5.7/fluentd_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.8/config/prometheus/collector_alerts.yaml release-5.8/collector_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.9/config/prometheus/collector_alerts.yaml release-5.9/collector_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-6.0/config/prometheus/collector_alerts.yaml release-6.0/collector_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-6.1/config/prometheus/collector_alerts.yaml release-6.1/collector_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/cluster-logging-operator/master/config/prometheus/collector_alerts.yaml master/collector_prometheus_alerts.yaml

https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.6/files/prometheus_alerts.yml release-5.6/elasticsearch_operator_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.7/files/prometheus_alerts.yml release-5.7/elasticsearch_operator_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.8/files/prometheus_alerts.yml release-5.8/elasticsearch_operator_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.8/files/prometheus_alerts.yml release-5.9/elasticsearch_operator_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/elasticsearch-operator/master/files/prometheus_alerts.yml master/elasticsearch_operator_prometheus_alerts.yaml

https://raw.githubusercontent.com/openshift/loki/release-5.6/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.6/lokistack_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/loki/release-5.7/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.7/lokistack_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/loki/release-5.8/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.8/lokistack_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/loki/release-5.9/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.9/lokistack_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/loki/release-6.0/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-6.0/lokistack_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/loki/release-6.1/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-6.1/lokistack_prometheus_alerts.yaml
https://raw.githubusercontent.com/openshift/loki/main/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml master/lokistack_prometheus_alerts.yaml
48 changes: 13 additions & 35 deletions class/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ parameters:
"False": {}

namespace: openshift-logging
version: '5.9'

version: '6.0'
channel: 'stable-${openshift4_logging:version}'
alerts: 'release-${openshift4_logging:version}'

components:
lokistack:
Expand Down Expand Up @@ -42,16 +42,6 @@ parameters:
ingestion:
ingestionBurstSize: 9
ingestionRate: 5
elasticsearch:
enabled: false
kibana_host: null
predict_elasticsearch_storage_alert:
enabled: true
lookback_range: 72h
predict_hours_from_now: 72
threshold: 85
for: 6h
severity: warning
logmetrics:
enabled: false
spec:
Expand All @@ -65,14 +55,20 @@ parameters:
cpu: 200m
memory: 128Mi

clusterLogging: {}
clusterLogForwarder: {}

namespaceLogForwarderEnabled: false
namespaceLogForwarder: {}

secrets: {}

alerts:
release: 'release-${openshift4_logging:version}'
ignore:
- ElasticsearchHighFileDescriptorUsage
- ElasticsearchOperatorCSVNotSuccessful
- FluentdQueueLengthIncreasing
patch:
FluentdQueueLengthIncreasing:
for: '12h'

operatorResources:
clusterLogging:
requests:
Expand All @@ -86,12 +82,6 @@ parameters:
cpu: 50m
limits:
memory: 512Mi
elasticsearch:
requests:
memory: 1Gi
cpu: 100m
limits:
memory: 1.5Gi

images:
kubectl:
Expand All @@ -104,17 +94,5 @@ parameters:
schedule: '*/10 * * * *'
sleep_time: 2m

ignore_alerts:
- ElasticsearchHighFileDescriptorUsage
- ElasticsearchOperatorCSVNotSuccessful
- FluentdQueueLengthIncreasing

patch_alerts:
FluentdQueueLengthIncreasing:
for: '12h'

openshift4_elasticsearch_operator:
targetNamespaces:
- ${openshift4_logging:namespace}

openshift4_console: ${openshift4_logging:_openshift4_console:${openshift4_logging:components:lokistack:enabled}}

73 changes: 8 additions & 65 deletions component/alertrules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@ local alertpatching = import 'lib/alert-patching.libsonnet';
local com = import 'lib/commodore.libjsonnet';
local kap = import 'lib/kapitan.libjsonnet';
local kube = import 'lib/kube.libjsonnet';
local utils = import 'utils.libsonnet';

local inv = kap.inventory();
local params = inv.parameters.openshift4_logging;
local elasticsearch = inv.parameters.openshift4_logging.components.elasticsearch;
local loki = inv.parameters.openshift4_logging.components.lokistack;
local lokiEnabled = params.components.lokistack.enabled;


local runbook(alertname) = 'https://hub.syn.tools/openshift4-logging/runbooks/%s.html' % alertname;
Expand All @@ -16,63 +14,26 @@ assert
std.member(inv.applications, 'openshift4-monitoring')
: 'Component `openshift4-monitoring` not enabled';

// Keep config backwards compatible
local predict_storage_alert = elasticsearch.predict_elasticsearch_storage_alert + (
if std.objectHas(params, 'predict_elasticsearch_storage_alert') then
std.trace(
'parameter predict_elasticsearch_storage_alert is deprecated, please use parameter `components.elasticsearch.predict_elasticsearch_storage_alert instead`',
com.makeMergeable(params.predict_elasticsearch_storage_alert)
)
else {}
);

// Upstream alerts to ignore
// Keep only alerts from params.ignore_alerts for which the last
// array entry wasn't prefixed with `~`.
local user_ignore_alerts = com.renderArray(params.ignore_alerts);

// Upstream alerts to ignore
local ignore_alerts = std.set(
// Add set of upstream alerts that should be ignored from processed value of
// `params.ignore_alerts`
user_ignore_alerts
com.renderArray(std.get(params, 'ignore_alerts', []))
+ com.renderArray(std.get(params, 'ignore_alerts', []))
);

// Alert rule patches.
// Provide partial objects for alert rules that need to be tuned compared to
// upstream. The keys in this object correspond to the `alert` field of the
// rule for which the patch is intended.
local patch_alerts = params.patch_alerts;
local patch_alerts = params.alerts.patch + std.get(params, 'patch_alerts', {});

local loadFile(file) =
local fpath = 'openshift4-logging/component/extracted_alerts/%s/%s' % [ params.alerts, file ];
local fpath = 'openshift4-logging/component/extracted_alerts/%s/%s' % [ params.alerts.release, file ];
std.parseJson(kap.yaml_load_stream(fpath));


// This will be processed by filter_patch_rules() as well
local predictESStorage = {
local alertName = 'ElasticsearchExpectNodeToReachDiskWatermark',
local hoursFromNow = predict_storage_alert.predict_hours_from_now,
local secondsFromNow = hoursFromNow * 3600,
alert: alertName,
annotations: {
message: (
'Expecting to reach disk low watermark at {{ $labels.node }} node in {{ $labels.cluster }} cluster in %s hours.'
+ ' When reaching the watermark no new shards will be allocated to this node anymore. You should consider adding more disk to the node.'
) % std.toString(hoursFromNow),
runbook_url: runbook('SYN_' + alertName),
summary: 'Expecting to Reach Disk Low Watermark in %s Hours' % std.toString(hoursFromNow),
},
expr: |||
sum by(cluster, instance, node) (
(1 - (predict_linear(es_fs_path_available_bytes[%s], %s) / es_fs_path_total_bytes)) * 100
) > %s
||| % [ predict_storage_alert.lookback_range, std.toString(secondsFromNow), std.toString(predict_storage_alert.threshold) ],
'for': predict_storage_alert['for'],
labels: {
severity: predict_storage_alert.severity,
},
};

local renderRunbookBaseURL(group, baseURL) = {
name: group.name,
rules: std.map(
Expand Down Expand Up @@ -119,23 +80,6 @@ local prometheus_rules(name, groups, baseURL) = kube._Object('monitoring.coreos.
},
};


// Elasticstack alerts

local esStorageGroup = {
name: 'elasticsearch_node_storage.alerts',
rules: [ predictESStorage ],
};
local fluentdGroup = if !utils.isVersion58 then loadFile('fluentd_prometheus_alerts.yaml')[0].groups else [];

local esGroups =
loadFile('elasticsearch_operator_prometheus_alerts.yaml')[0].groups +
fluentdGroup +
[
if predict_storage_alert.enabled then esStorageGroup,
];
local esBaseURL = 'https://github.com/openshift/elasticsearch-operator/blob/master/docs/alerts.md';

// Lokistack alerts

local lokiGroups = loadFile('lokistack_prometheus_alerts.yaml')[0].groups;
Expand All @@ -146,7 +90,6 @@ local lokiBaseURL = 'https://github.com/grafana/loki/blob/main/operator/docs/lok
local collectorGroups = loadFile('collector_prometheus_alerts.yaml')[0].spec.groups;

{
[if elasticsearch.enabled then '60_elasticsearch_alerts']: prometheus_rules('syn-elasticsearch-logging-rules', esGroups, esBaseURL),
[if loki.enabled then '60_lokistack_alerts']: prometheus_rules('syn-loki-logging-rules', lokiGroups, lokiBaseURL),
[if utils.isVersion58 then '60_collector_alerts']: prometheus_rules('syn-collector-rules', collectorGroups, ''),
[if lokiEnabled then '60_lokistack_alerts']: prometheus_rules('syn-loki-logging-rules', lokiGroups, lokiBaseURL),
'60_collector_alerts': prometheus_rules('syn-collector-rules', collectorGroups, ''),
}
12 changes: 9 additions & 3 deletions component/app.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@ local inv = kap.inventory();
local params = inv.parameters.openshift4_logging;
local argocd = import 'lib/argocd.libjsonnet';

local app = argocd.App('openshift4-logging', params.namespace);

{
'openshift4-logging': app,
'openshift4-logging': argocd.App('openshift4-logging', params.namespace) {
spec+: {
syncPolicy+: {
syncOptions+: [
'ServerSideApply=true',
],
},
},
},
}
Loading

0 comments on commit c6d5280

Please sign in to comment.