From b0aa6fa8bbb2333e6be6a4bbe5b4dafac84724c9 Mon Sep 17 00:00:00 2001
From: Antoine Gaillard <antoine.gaillard@datadoghq.com>
Date: Wed, 21 Aug 2024 10:30:23 +0200
Subject: [PATCH 1/2] Add helper to match taint by key only

---
 staging/src/k8s.io/api/core/v1/taint.go      |  5 ++
 staging/src/k8s.io/api/core/v1/taint_test.go | 68 ++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/staging/src/k8s.io/api/core/v1/taint.go b/staging/src/k8s.io/api/core/v1/taint.go
index db71bd2fd0b60..e9c5f9134427d 100644
--- a/staging/src/k8s.io/api/core/v1/taint.go
+++ b/staging/src/k8s.io/api/core/v1/taint.go
@@ -24,6 +24,11 @@ func (t *Taint) MatchTaint(taintToMatch *Taint) bool {
 	return t.Key == taintToMatch.Key && t.Effect == taintToMatch.Effect
 }
 
+// MatchTaintByKey checks if the taint key matches taintToMatch key
+func (t *Taint) MatchTaintByKey(taintToMatch *Taint) bool {
+	return t.Key == taintToMatch.Key
+}
+
 // taint.ToString() converts taint struct to string in format '<key>=<value>:<effect>', '<key>=<value>:', '<key>:<effect>', or '<key>'.
 func (t *Taint) ToString() string {
 	if len(t.Effect) == 0 {
diff --git a/staging/src/k8s.io/api/core/v1/taint_test.go b/staging/src/k8s.io/api/core/v1/taint_test.go
index 6a41718298473..1c86013550952 100644
--- a/staging/src/k8s.io/api/core/v1/taint_test.go
+++ b/staging/src/k8s.io/api/core/v1/taint_test.go
@@ -133,3 +133,71 @@ func TestMatchTaint(t *testing.T) {
 		}
 	}
 }
+
+func TestMatchTaintByKey(t *testing.T) {
+	testCases := []struct {
+		description  string
+		taint        *Taint
+		taintToMatch Taint
+		expectMatch  bool
+	}{
+		{
+			description: "two taints with the same key should match",
+			taint: &Taint{
+				Key: "foo",
+			},
+			taintToMatch: Taint{
+				Key: "foo",
+			},
+			expectMatch: true,
+		},
+		{
+			description: "two taints with the same key but different value should match",
+			taint: &Taint{
+				Key:    "foo",
+				Value:  "bar",
+				Effect: TaintEffectNoSchedule,
+			},
+			taintToMatch: Taint{
+				Key:    "foo",
+				Value:  "different-value",
+				Effect: TaintEffectNoSchedule,
+			},
+			expectMatch: true,
+		},
+		{
+			description: "two taints with the different key cannot match",
+			taint: &Taint{
+				Key:    "foo",
+				Value:  "bar",
+				Effect: TaintEffectNoSchedule,
+			},
+			taintToMatch: Taint{
+				Key:    "different-key",
+				Value:  "bar",
+				Effect: TaintEffectNoSchedule,
+			},
+			expectMatch: false,
+		},
+		{
+			description: "two taints with the same key but different effect should match",
+			taint: &Taint{
+				Key:    "foo",
+				Value:  "bar",
+				Effect: TaintEffectNoSchedule,
+			},
+			taintToMatch: Taint{
+				Key:    "foo",
+				Value:  "bar",
+				Effect: TaintEffectPreferNoSchedule,
+			},
+			expectMatch: true,
+		},
+	}
+
+	for _, tc := range testCases {
+		if tc.expectMatch != tc.taint.MatchTaintByKey(&tc.taintToMatch) {
+			t.Errorf("[%s] expect taint %s match taint %s", tc.description, tc.taint.ToString(), tc.taintToMatch.ToString())
+		}
+	}
+}

From 8ba44e6ac8b8030746bf99e999a6c5f60e7a3a61 Mon Sep 17 00:00:00 2001
From: Antoine Gaillard <antoine.gaillard@datadoghq.com>
Date: Wed, 21 Aug 2024 10:32:33 +0200
Subject: [PATCH 2/2] Add helper to remove not-ready taint

---
 .../cloud-provider/node/helpers/taints.go     |  79 ++++++++--
 .../node/helpers/taints_test.go               | 149 +++++++++++++++++-
 2 files changed, 206 insertions(+), 22 deletions(-)

diff --git a/staging/src/k8s.io/cloud-provider/node/helpers/taints.go b/staging/src/k8s.io/cloud-provider/node/helpers/taints.go
index fb15d64bd24a5..cf260b5c9ad7f 100644
--- a/staging/src/k8s.io/cloud-provider/node/helpers/taints.go
+++ b/staging/src/k8s.io/cloud-provider/node/helpers/taints.go
@@ -28,6 +28,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"k8s.io/klog/v2"
 	"time"
 
 	"k8s.io/api/core/v1"
@@ -145,11 +146,7 @@ func addOrUpdateTaint(node *v1.Node, taint *v1.Taint) (*v1.Node, bool, error) {
 	return newNode, true, nil
 }
 
-// RemoveTaintOffNode is for cleaning up taints temporarily added to node,
-// won't fail if target taint doesn't exist or has been removed.
-// If passed a node it'll check if there's anything to be done, if taint is not present it won't issue
-// any API calls.
-func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error {
+func removeTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, fn matchTaintFunc, taints ...*v1.Taint) error {
 	if len(taints) == 0 {
 		return nil
 	}
@@ -157,7 +154,7 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t
 	if node != nil {
 		match := false
 		for _, taint := range taints {
-			if taintExists(node.Spec.Taints, taint) {
+			if taintExists(node.Spec.Taints, taint, fn) {
 				match = true
 				break
 			}
@@ -187,7 +184,7 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t
 		oldNodeCopy := oldNode
 		updated := false
 		for _, taint := range taints {
-			curNewNode, ok, err := removeTaint(oldNodeCopy, taint)
+			curNewNode, ok, err := removeMatchingTaint(oldNodeCopy, taint, fn)
 			if err != nil {
 				return fmt.Errorf("failed to remove taint of node")
 			}
@@ -202,40 +199,58 @@ func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, t
 	})
 }
 
-// taintExists checks if the given taint exists in list of taints. Returns true if exists false otherwise.
-func taintExists(taints []v1.Taint, taintToFind *v1.Taint) bool {
+// RemoveTaintOffNode is for cleaning up taints temporarily added to node,
+// won't fail if target taint doesn't exist or has been removed.
+// If passed a node it'll check if there's anything to be done, if taint is not present it won't issue
+// any API calls.
+func RemoveTaintOffNode(c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error {
+	return removeTaintOffNode(c, nodeName, node, (*v1.Taint).MatchTaint, taints...)
+}
+
+// RemoveTaintOffNodeByKey is for cleaning up taints temporarily added to node matching by key only,
+// won't fail if target taint doesn't exist or has been removed.
+// If passed a node it'll check if there's anything to be done, if taint is not present it won't issue
+// any API calls.
+func RemoveTaintOffNodeByKey(c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error {
+	return removeTaintOffNode(c, nodeName, node, (*v1.Taint).MatchTaintByKey, taints...)
+}
+
+type matchTaintFunc func(taint *v1.Taint, taintToMatch *v1.Taint) bool
+
+// taintExists checks if matchFn evaluates to true on the list of taints. Returns true if there's a match, false otherwise
+func taintExists(taints []v1.Taint, taintToFind *v1.Taint, matchFn matchTaintFunc) bool {
 	for _, taint := range taints {
-		if taint.MatchTaint(taintToFind) {
+		if matchFn(&taint, taintToFind) {
 			return true
 		}
 	}
 	return false
 }
 
-// removeTaint tries to remove a taint from annotations list. Returns a new copy of updated Node and true if something was updated
+// removeTaint tries to remove a taint from annotations list that satisfies matchFn predicate. Returns a new copy of updated Node and true if something was updated
 // false otherwise.
-func removeTaint(node *v1.Node, taint *v1.Taint) (*v1.Node, bool, error) {
+func removeMatchingTaint(node *v1.Node, taint *v1.Taint, matchFn matchTaintFunc) (*v1.Node, bool, error) {
 	newNode := node.DeepCopy()
 	nodeTaints := newNode.Spec.Taints
 	if len(nodeTaints) == 0 {
 		return newNode, false, nil
 	}
 
-	if !taintExists(nodeTaints, taint) {
+	if !taintExists(nodeTaints, taint, matchFn) {
 		return newNode, false, nil
 	}
 
-	newTaints, _ := deleteTaint(nodeTaints, taint)
+	newTaints, _ := deleteMatchingTaint(nodeTaints, taint, matchFn)
 	newNode.Spec.Taints = newTaints
 	return newNode, true, nil
 }
 
-// deleteTaint removes all the taints that have the same key and effect to given taintToDelete.
-func deleteTaint(taints []v1.Taint, taintToDelete *v1.Taint) ([]v1.Taint, bool) {
+// deleteTaint removes all the taints that match using matchFn
+func deleteMatchingTaint(taints []v1.Taint, taintToDelete *v1.Taint, matchFn matchTaintFunc) ([]v1.Taint, bool) {
 	newTaints := []v1.Taint{}
 	deleted := false
 	for i := range taints {
-		if taintToDelete.MatchTaint(&taints[i]) {
+		if matchFn(taintToDelete, &taints[i]) {
 			deleted = true
 			continue
 		}
@@ -243,3 +258,33 @@ func deleteTaint(taints []v1.Taint, taintToDelete *v1.Taint) ([]v1.Taint, bool)
 	}
 	return newTaints, deleted
 }
+
+type ComponentReadyFunc func(ctx context.Context, c clientset.Interface, nodeName, driverName string) error
+
+// RemoveNotReadyTaint removes the taint componentName/agent-not-ready from the local node
+// This taint can be optionally applied by users to prevent startup race conditions such as
+// https://github.com/kubernetes/kubernetes/issues/95911
+func RemoveNotReadyTaint(c clientset.Interface, nodeName, componentName string, readyFn ComponentReadyFunc) error {
+	ctx := context.Background()
+	node, err := c.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
+	if err != nil {
+		return err
+	}
+
+	if err := readyFn(ctx, c, nodeName, componentName); err != nil {
+		return err
+	}
+
+	const AgentNotReadyNodeTaintKeySuffix = "/agent-not-ready"
+
+	taintKeyToRemove := componentName + AgentNotReadyNodeTaintKeySuffix
+	klog.V(2).Infof("removing taint with key %s from local node %s", taintKeyToRemove, nodeName)
+
+	err = RemoveTaintOffNodeByKey(c, nodeName, node, &v1.Taint{Key: taintKeyToRemove})
+	if err != nil {
+		return err
+	}
+
+	klog.V(2).Infof("removed taint with key %s from local node %s successfully", taintKeyToRemove, nodeName)
+	return nil
+}
diff --git a/staging/src/k8s.io/cloud-provider/node/helpers/taints_test.go b/staging/src/k8s.io/cloud-provider/node/helpers/taints_test.go
index 859b517e1754e..815920ad852e9 100644
--- a/staging/src/k8s.io/cloud-provider/node/helpers/taints_test.go
+++ b/staging/src/k8s.io/cloud-provider/node/helpers/taints_test.go
@@ -17,10 +17,17 @@ limitations under the License.
 package helpers
 
 import (
+	"context"
+	"errors"
+	"github.com/stretchr/testify/assert"
+	"k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	clientset "k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/kubernetes/fake"
+	"k8s.io/kubernetes/test/utils/ktesting"
 	"reflect"
 	"testing"
-
-	"k8s.io/api/core/v1"
 )
 
 func TestTaintExists(t *testing.T) {
@@ -40,27 +47,49 @@ func TestTaintExists(t *testing.T) {
 	cases := []struct {
 		name           string
 		taintToFind    *v1.Taint
+		matchingFn     matchTaintFunc
 		expectedResult bool
 	}{
 		{
 			name:           "taint exists",
 			taintToFind:    &v1.Taint{Key: "foo_1", Value: "bar_1", Effect: v1.TaintEffectNoExecute},
+			matchingFn:     (*v1.Taint).MatchTaint,
 			expectedResult: true,
 		},
 		{
 			name:           "different key",
 			taintToFind:    &v1.Taint{Key: "no_such_key", Value: "bar_1", Effect: v1.TaintEffectNoExecute},
+			matchingFn:     (*v1.Taint).MatchTaint,
 			expectedResult: false,
 		},
 		{
 			name:           "different effect",
 			taintToFind:    &v1.Taint{Key: "foo_1", Value: "bar_1", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:     (*v1.Taint).MatchTaint,
+			expectedResult: false,
+		},
+		{
+			name:           "same key, match by key",
+			taintToFind:    &v1.Taint{Key: "foo_1", Value: "bar_1", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:     (*v1.Taint).MatchTaintByKey,
+			expectedResult: true,
+		},
+		{
+			name:           "different effect, match by key",
+			taintToFind:    &v1.Taint{Key: "foo_1", Value: "bar_1", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:     (*v1.Taint).MatchTaintByKey,
+			expectedResult: true,
+		},
+		{
+			name:           "different key, match by key",
+			taintToFind:    &v1.Taint{Key: "no_such_key", Value: "bar_1", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:     (*v1.Taint).MatchTaintByKey,
 			expectedResult: false,
 		},
 	}
 
 	for _, c := range cases {
-		result := taintExists(testingTaints, c.taintToFind)
+		result := taintExists(testingTaints, c.taintToFind, c.matchingFn)
 
 		if result != c.expectedResult {
 			t.Errorf("[%s] unexpected results: %v", c.name, result)
@@ -74,6 +103,7 @@ func TestRemoveTaint(t *testing.T) {
 		name           string
 		node           *v1.Node
 		taintToRemove  *v1.Taint
+		matchingFn     matchTaintFunc
 		expectedTaints []v1.Taint
 		expectedResult bool
 	}{
@@ -93,6 +123,7 @@ func TestRemoveTaint(t *testing.T) {
 				Key:    "foo_1",
 				Effect: v1.TaintEffectNoSchedule,
 			},
+			matchingFn: (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{
 				{
 					Key:    "foo",
@@ -117,6 +148,7 @@ func TestRemoveTaint(t *testing.T) {
 				Key:    "foo",
 				Effect: v1.TaintEffectNoSchedule,
 			},
+			matchingFn:     (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{},
 			expectedResult: true,
 		},
@@ -131,13 +163,14 @@ func TestRemoveTaint(t *testing.T) {
 				Key:    "foo",
 				Effect: v1.TaintEffectNoSchedule,
 			},
+			matchingFn:     (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{},
 			expectedResult: false,
 		},
 	}
 
 	for _, c := range cases {
-		newNode, result, err := removeTaint(c.node, c.taintToRemove)
+		newNode, result, err := removeMatchingTaint(c.node, c.taintToRemove, c.matchingFn)
 		if err != nil {
 			t.Errorf("[%s] should not raise error but got: %v", c.name, err)
 		}
@@ -155,6 +188,7 @@ func TestDeleteTaint(t *testing.T) {
 		name           string
 		taints         []v1.Taint
 		taintToDelete  *v1.Taint
+		matchingFn     matchTaintFunc
 		expectedTaints []v1.Taint
 		expectedResult bool
 	}{
@@ -167,6 +201,7 @@ func TestDeleteTaint(t *testing.T) {
 				},
 			},
 			taintToDelete: &v1.Taint{Key: "foo_1", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:    (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{
 				{
 					Key:    "foo",
@@ -184,6 +219,7 @@ func TestDeleteTaint(t *testing.T) {
 				},
 			},
 			taintToDelete: &v1.Taint{Key: "foo", Effect: v1.TaintEffectNoExecute},
+			matchingFn:    (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{
 				{
 					Key:    "foo",
@@ -201,6 +237,7 @@ func TestDeleteTaint(t *testing.T) {
 				},
 			},
 			taintToDelete:  &v1.Taint{Key: "foo", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:     (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{},
 			expectedResult: true,
 		},
@@ -208,13 +245,14 @@ func TestDeleteTaint(t *testing.T) {
 			name:           "delete taint from empty taint array",
 			taints:         []v1.Taint{},
 			taintToDelete:  &v1.Taint{Key: "foo", Effect: v1.TaintEffectNoSchedule},
+			matchingFn:     (*v1.Taint).MatchTaint,
 			expectedTaints: []v1.Taint{},
 			expectedResult: false,
 		},
 	}
 
 	for _, c := range cases {
-		taints, result := deleteTaint(c.taints, c.taintToDelete)
+		taints, result := deleteMatchingTaint(c.taints, c.taintToDelete, c.matchingFn)
 		if result != c.expectedResult {
 			t.Errorf("[%s] should return %t, but got: %t", c.name, c.expectedResult, result)
 		}
@@ -223,3 +261,104 @@ func TestDeleteTaint(t *testing.T) {
 		}
 	}
 }
+
+func ready(_ context.Context, _ clientset.Interface, _, _ string) error {
+	return nil
+}
+
+func notReady(_ context.Context, _ clientset.Interface, _, _ string) error {
+	return errors.New("not ready")
+}
+
+func TestRemoveNotReadyTaint(t *testing.T) {
+	cases := []struct {
+		name           string
+		c              kubernetes.Interface
+		nodeName       string
+		componentName  string
+		readyFn        ComponentReadyFunc
+		expectedTaints []v1.Taint
+		shouldErr      bool
+	}{
+		{
+			name: "remove single taint",
+			c: fake.NewSimpleClientset(&v1.Node{
+				ObjectMeta: metav1.ObjectMeta{Name: "node1"},
+				Spec:       v1.NodeSpec{Taints: []v1.Taint{{Key: "driver/agent-not-ready", Effect: v1.TaintEffectNoExecute}}},
+			}),
+			nodeName:       "node1",
+			componentName:  "driver",
+			expectedTaints: nil,
+			readyFn:        ready,
+			shouldErr:      false,
+		},
+		{
+			name: "remove several taints",
+			c: fake.NewSimpleClientset(&v1.Node{
+				ObjectMeta: metav1.ObjectMeta{Name: "node1"},
+				Spec: v1.NodeSpec{Taints: []v1.Taint{
+					{Key: "driver/agent-not-ready", Effect: v1.TaintEffectNoExecute},
+					{Key: "driver/agent-not-ready", Effect: v1.TaintEffectNoSchedule},
+				}},
+			}),
+			nodeName:       "node1",
+			componentName:  "driver",
+			expectedTaints: nil,
+			readyFn:        ready,
+			shouldErr:      false,
+		},
+		{
+			name: "remove only matching taints",
+			c: fake.NewSimpleClientset(&v1.Node{
+				ObjectMeta: metav1.ObjectMeta{Name: "node1"},
+				Spec: v1.NodeSpec{Taints: []v1.Taint{
+					{Key: "driver/agent-not-ready", Effect: v1.TaintEffectNoExecute},
+					{Key: "foo", Effect: v1.TaintEffectNoSchedule},
+				}},
+			}),
+			nodeName:       "node1",
+			componentName:  "driver",
+			expectedTaints: []v1.Taint{{Key: "foo", Effect: v1.TaintEffectNoSchedule}},
+			readyFn:        ready,
+			shouldErr:      false,
+		},
+		{
+			name: "don't remove taint if not ready",
+			c: fake.NewSimpleClientset(&v1.Node{
+				ObjectMeta: metav1.ObjectMeta{Name: "node1"},
+				Spec:       v1.NodeSpec{Taints: []v1.Taint{{Key: "driver/agent-not-ready", Effect: v1.TaintEffectNoExecute}}},
+			}),
+			nodeName:       "node1",
+			componentName:  "driver",
+			expectedTaints: []v1.Taint{{Key: "driver/agent-not-ready", Effect: v1.TaintEffectNoExecute}},
+			readyFn:        notReady,
+			shouldErr:      true,
+		},
+		{
+			name: "noop if taint doesn't exist",
+			c: fake.NewSimpleClientset(&v1.Node{
+				ObjectMeta: metav1.ObjectMeta{Name: "node1"},
+				Spec:       v1.NodeSpec{},
+			}),
+			nodeName:       "node1",
+			componentName:  "driver",
+			expectedTaints: nil,
+			readyFn:        ready,
+			shouldErr:      false,
+		},
+	}
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			_, ctx := ktesting.NewTestContext(t)
+			err := RemoveNotReadyTaint(tt.c, tt.nodeName, tt.componentName, tt.readyFn)
+			if tt.shouldErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+			}
+			actualNode, err := tt.c.CoreV1().Nodes().Get(ctx, tt.nodeName, metav1.GetOptions{})
+			assert.NoError(t, err)
+			assert.Equal(t, tt.expectedTaints, actualNode.Spec.Taints)
+		})
+	}
+}