From d7041cbbf3bee83047a8ed7fadcd5945a7235940 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Mon, 18 Nov 2024 15:38:57 +0530 Subject: [PATCH 1/2] Add disk usage prometheus metrics to jupyterhub-home-nfs The disk usage prometheus metrics are exported by a sidecar container running prometheus node-exporter. --- .../templates/deployment.yaml | 21 +++++++++++++++++++ helm/jupyterhub-home-nfs/values.yaml | 19 +++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/helm/jupyterhub-home-nfs/templates/deployment.yaml b/helm/jupyterhub-home-nfs/templates/deployment.yaml index 0d54440..a9dc84a 100644 --- a/helm/jupyterhub-home-nfs/templates/deployment.yaml +++ b/helm/jupyterhub-home-nfs/templates/deployment.yaml @@ -11,6 +11,11 @@ spec: metadata: labels: app: nfs-server + annotations: + {{- if .Values.prometheusExporter.enabled }} + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + {{- end }} spec: containers: - name: nfs-server @@ -35,6 +40,22 @@ spec: volumeMounts: - name: home-directories mountPath: /export + {{- if .Values.prometheusExporter.enabled }} + - name: metrics-exporter + image: "{{ .Values.prometheusExporter.image.repository }}:{{ .Values.prometheusExporter.image.tag }}" + args: + - --collector.disable-defaults + - --path.rootfs=/export + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem + ports: + - name: metrics + containerPort: 9100 + volumeMounts: + - name: home-directories + mountPath: /export + readOnly: true + {{- end }} volumes: - name: home-directories persistentVolumeClaim: diff --git a/helm/jupyterhub-home-nfs/values.yaml b/helm/jupyterhub-home-nfs/values.yaml index 499b6f4..6081ec2 100644 --- a/helm/jupyterhub-home-nfs/values.yaml +++ b/helm/jupyterhub-home-nfs/values.yaml @@ -1,8 +1,14 @@ +# NFS Ganesha configuration +# NFS Ganesha is the user space NFS server that we use to serve the home directories + nfsServer: image: repository: ghcr.io/2i2c-org/nfs-ganesha tag: 0.0.6 +# Quota enforcer configuration +# This container enforces the quota on the home directories + quotaEnforcer: image: repository: ghcr.io/2i2c-org/get-quota-your-home @@ -11,7 +17,20 @@ quotaEnforcer: # quota in GB hardQuota: "10" +# Prometheus exporter configuration +# We export disk usage metrics using the Prometheus node exporter + +prometheusExporter: + enabled: true + image: + repository: quay.io/prometheus/node-exporter + tag: v1.8.2 + +# Persistent volume configuration + persistentVolume: + # The size should match the pre-provisioned disk size + # The pre-provisioned disk ID is specified below in the cloud provider specific configuration size: 10Gi storageClass: "" accessModes: From 79b26639a338139aea0fe49c763d9b972d5f6077 Mon Sep 17 00:00:00 2001 From: Tarashish Mishra Date: Fri, 22 Nov 2024 13:27:28 +0530 Subject: [PATCH 2/2] Update metric exporter labels and mount path The dashboard definition in jupyterhub/grafana-dashboards expects the NFS home directory disk to be mounted at /shared-volume and the deployment to have the label `component: shared-volume-metrics` --- helm/jupyterhub-home-nfs/templates/deployment.yaml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/helm/jupyterhub-home-nfs/templates/deployment.yaml b/helm/jupyterhub-home-nfs/templates/deployment.yaml index a9dc84a..d9e8781 100644 --- a/helm/jupyterhub-home-nfs/templates/deployment.yaml +++ b/helm/jupyterhub-home-nfs/templates/deployment.yaml @@ -11,6 +11,10 @@ spec: metadata: labels: app: nfs-server + # The component label is used by the shared volume free space panel + # in jupyterhub/grafana-dashboards + # https://github.com/jupyterhub/grafana-dashboards/blob/18ba92d98cd297951673850a4c92507479ec4ca2/dashboards/jupyterhub.jsonnet#L322 + component: shared-volume-metrics annotations: {{- if .Values.prometheusExporter.enabled }} prometheus.io/scrape: "true" @@ -44,16 +48,18 @@ spec: - name: metrics-exporter image: "{{ .Values.prometheusExporter.image.repository }}:{{ .Values.prometheusExporter.image.tag }}" args: + # Disable default collectors; we only want filesystem metrics - --collector.disable-defaults - - --path.rootfs=/export - - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - --collector.filesystem + # Exclude some patterns of mount points to avoid collecting unnecessary metrics + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|etc|var/run/.+|var/lib/docker/.+|var/lib/kubelet/.+)($|/) ports: - name: metrics containerPort: 9100 volumeMounts: - name: home-directories - mountPath: /export + # Mounting under /shared-volume to match path in dashboard definition in jupyterhub/grafana-dashboards + mountPath: /shared-volume readOnly: true {{- end }} volumes: