Skip to content

Commit

Permalink
feat: check for available node resources before building injector pod (
Browse files Browse the repository at this point in the history
…#2220)

## Description

Addresses #2144 to check
if node has enough resources before the injector runs

## Related Issue

Fixes #2144 

## Type of change

- [x] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Other (security config, docs update, etc)

## Checklist before merging

- [x] Test, docs, adr added or updated as needed
- [x] [Contributor Guide
Steps](https://github.com/defenseunicorns/zarf/blob/main/CONTRIBUTING.md#developer-workflow)
followed

---------

Co-authored-by: Wayne Starr <[email protected]>
  • Loading branch information
chrishorton and Racer159 authored Jan 10, 2024
1 parent 9514b23 commit 98a19b4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
17 changes: 12 additions & 5 deletions src/pkg/cluster/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ import (
// The chunk size for the tarball chunks.
var payloadChunkSize = 1024 * 768

var (
injectorRequestedCPU = resource.MustParse(".5")
injectorRequestedMemory = resource.MustParse("64Mi")
injectorLimitCPU = resource.MustParse("1")
injectorLimitMemory = resource.MustParse("256Mi")
)

// StartInjectionMadness initializes a Zarf injection into the cluster.
func (c *Cluster) StartInjectionMadness(tmpDir string, imagesDir string, injectorSeedSrcs []string) {
spinner := message.NewProgressSpinner("Attempting to bootstrap the seed image into the cluster")
Expand All @@ -54,7 +61,7 @@ func (c *Cluster) StartInjectionMadness(tmpDir string, imagesDir string, injecto
// Get all the images from the cluster
timeout := 5 * time.Minute
spinner.Updatef("Getting the list of existing cluster images (%s timeout)", timeout.String())
if images, err = c.GetAllImages(timeout); err != nil {
if images, err = c.GetAllImages(timeout, injectorRequestedCPU, injectorRequestedMemory); err != nil {
spinner.Fatalf(err, "Unable to generate a list of candidate images to perform the registry injection")
}

Expand Down Expand Up @@ -362,12 +369,12 @@ func (c *Cluster) buildInjectionPod(node, image string, payloadConfigmaps []stri
// Keep resources as light as possible as we aren't actually running the container's other binaries
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse(".5"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
corev1.ResourceCPU: injectorRequestedCPU,
corev1.ResourceMemory: injectorRequestedMemory,
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("256Mi"),
corev1.ResourceCPU: injectorLimitCPU,
corev1.ResourceMemory: injectorLimitMemory,
},
},
},
Expand Down
12 changes: 9 additions & 3 deletions src/pkg/k8s/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"sort"
"time"

"k8s.io/apimachinery/pkg/api/resource"
corev1 "k8s.io/api/core/v1"
)

Expand All @@ -19,7 +20,7 @@ type ImageMap map[string]bool
type ImageNodeMap map[string][]string

// GetAllImages returns a list of images and their nodes found in pods in the cluster.
func (k *K8s) GetAllImages(timeoutDuration time.Duration) (ImageNodeMap, error) {
func (k *K8s) GetAllImages(timeoutDuration time.Duration, minNodeCPU resource.Quantity, minNodeMemory resource.Quantity) (ImageNodeMap, error) {
timeout := time.After(timeoutDuration)

for {
Expand All @@ -34,7 +35,7 @@ func (k *K8s) GetAllImages(timeoutDuration time.Duration) (ImageNodeMap, error)
// After delay, try running.
default:
// If no images or an error, log and loop.
if images, err := k.GetImagesWithNodes(corev1.NamespaceAll); len(images) < 1 || err != nil {
if images, err := k.GetImagesWithNodes(corev1.NamespaceAll, minNodeCPU, minNodeMemory); len(images) < 1 || err != nil {
k.Log("no images found: %w", err)
} else {
// Otherwise, return the image list.
Expand All @@ -46,7 +47,7 @@ func (k *K8s) GetAllImages(timeoutDuration time.Duration) (ImageNodeMap, error)

// GetImagesWithNodes checks for images on schedulable nodes and returns
// a map of these images and their nodes in a given namespace.
func (k *K8s) GetImagesWithNodes(namespace string) (ImageNodeMap, error) {
func (k *K8s) GetImagesWithNodes(namespace string, minNodeCPU resource.Quantity, minNodeMemory resource.Quantity) (ImageNodeMap, error) {
result := make(ImageNodeMap)

pods, err := k.GetPods(namespace)
Expand All @@ -68,6 +69,11 @@ findImages:
return nil, fmt.Errorf("unable to get the node %s", pod.Spec.NodeName)
}

if nodeDetails.Status.Allocatable.Cpu().Cmp(minNodeCPU) < 0 ||
nodeDetails.Status.Allocatable.Memory().Cmp(minNodeMemory) < 0 {
continue findImages
}

for _, taint := range nodeDetails.Spec.Taints {
if taint.Effect == corev1.TaintEffectNoSchedule || taint.Effect == corev1.TaintEffectNoExecute {
continue findImages
Expand Down

0 comments on commit 98a19b4

Please sign in to comment.