Skip to content

Commit

Permalink
Allow delaying machine config pool upgrades (node reboots) (#64)
Browse files Browse the repository at this point in the history
  • Loading branch information
bastjan authored Apr 2, 2024
1 parent cae7dbc commit fa968ce
Show file tree
Hide file tree
Showing 12 changed files with 777 additions and 27 deletions.
45 changes: 44 additions & 1 deletion api/v1beta1/upgradejob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,19 @@ const (
UpgradeJobConditionUpgradeCompleted = "UpgradeCompleted"
// UpgradeJobConditionPostHealthCheckDone is the condition type for a post health check done upgrade job
UpgradeJobConditionPostHealthCheckDone = "PostHealthCheckDone"
// UpgradeJobConditionPaused is the condition type for a paused upgrade job.
// A upgrade job can be paused if `.spec.machineConfigPools` matches a pool and `delayUpgrade` is set.
UpgradeJobConditionPaused = "Paused"
// UpgradeJobConditionMachineConfigPoolsPaused is true if the controller paused any machine config pools.
// Does not correlate with any upgrade specific condition.
UpgradeJobConditionMachineConfigPoolsPaused = "MachineConfigPoolsPaused"

// UpgradeJobReasonFailed is the generic reason for a failed upgrade job
UpgradeJobReasonFailed = "Failed"
// UpgradeJobReasonExpired is used when the upgrade job is not started before the startBefore time
UpgradeJobReasonExpired = "Expired"
// UpgradeJobReasonUnpausingPoolsExpired is used when the upgrade job was not able to unpause the machine config pools before the delayMax time
UpgradeJobReasonUnpausingPoolsExpired = "UnpausingPoolsExpired"
// UpgradeJobReasonTimedOut is used when the upgrade job is not completed before the upgradeTimeout time
UpgradeJobReasonTimedOut = "TimedOut"
// UpgradeJobReasonPreHealthCheckFailed is used when the health check failed
Expand All @@ -41,6 +49,10 @@ const (
UpgradeJobReasonCompleted = "Completed"
// UpgradeJobReasonInProgress is used when the pre health check was done
UpgradeJobReasonInProgress = "InProgress"
// UpgradeJobReasonNoManagedPools is used when no machine config pools are managed by the upgrade job
UpgradeJobReasonNoManagedPools = "NoManagedPools"
// UpgradeJobReasonDelaySet is used if the upgrade job paused machine config pools due to delayUpgrade
UpgradeJobReasonDelaySet = "DelaySet"
)

// UpgradeJobSpec defines the desired state of UpgradeJob
Expand All @@ -62,7 +74,8 @@ type UpgradeJobSpec struct {

// UpgradeJobConfig defines the configuration for the upgrade job
type UpgradeJobConfig struct {
// UpgradeTimeout defines the timeout after which the upgrade is considered failed
// UpgradeTimeout defines the timeout after which the upgrade is considered failed.
// Relative to the `.spec.startAfter` timestamp of the upgrade job.
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Format=duration
// +kubebuilder:default:="12h"
Expand All @@ -72,6 +85,36 @@ type UpgradeJobConfig struct {
PreUpgradeHealthChecks UpgradeJobHealthCheck `json:"preUpgradeHealthChecks"`
// PostUpgradeHealthChecks defines the health checks to be performed after the upgrade
PostUpgradeHealthChecks UpgradeJobHealthCheck `json:"postUpgradeHealthChecks"`

// MachineConfigPools defines the machine config pool specific configuration for the upgrade job
// +optional
MachineConfigPools []UpgradeJobMachineConfigPoolSpec `json:"machineConfigPools,omitempty"`
}

// UpgradeJobMachineConfigPoolSpec allows configuring the upgrade of a machine config pool
type UpgradeJobMachineConfigPoolSpec struct {
// MatchLabels defines the labels to match the machine config pool.
// If empty, all machine config pools are matched.
// If nil, no machine config pools are matched.
// +optional
MatchLabels *metav1.LabelSelector `json:"matchLabels,omitempty"`

// DelayUpgrade defines whether to delay the upgrade of the machine config pool
// +optional
DelayUpgrade UpgradeJobMachineConfigPoolDelayUpgradeSpec `json:"delayUpgrade,omitempty"`
}

// UpgradeJobMachineConfigPoolDelayUpgradeSpec defines the delay for the upgrade of a machine config pool
type UpgradeJobMachineConfigPoolDelayUpgradeSpec struct {
// DelayMin defines the delay after which the upgrade of the machine config pool should start.
// Relative to the `.spec.startAfter` timestamp of the upgrade job.
// +optional
DelayMin metav1.Duration `json:"delayMin,omitempty"`
// DelayMax defines the maximum delay after which the upgrade of the machine config pool should start.
// Relative to the `.spec.startBefore` timestamp of the upgrade job.
// If the upgrade of the machine config pool can't be started before this time, it is considered failed.
// +optional
DelayMax metav1.Duration `json:"delayMax,omitempty"`
}

// UpgradeJobHealthCheck defines the health checks to be performed
Expand Down
3 changes: 2 additions & 1 deletion api/v1beta1/upgradejobhook_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ const (
EventCreate UpgradeEvent = "Create"
// EventStart is the event type for when a job is started.
EventStart UpgradeEvent = "Start"
// UpgradeCompleted is the event type for when the upgrade is completed and health checks have passed.
// UpgradeCompleted is the event type for when the upgrade is completed and health checks have passed,
// but before any paused MachineConfigPools are done upgrading.
EventUpgradeComplete UpgradeEvent = "UpgradeComplete"

// EventFinish is the event type for when a job is finished regardless of outcome.
Expand Down
51 changes: 48 additions & 3 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 76 additions & 2 deletions config/crd/bases/managedupgrade.appuio.io_upgradeconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,79 @@ spec:
description: UpgradeJobConfig defines the configuration for
the upgrade job
properties:
machineConfigPools:
description: MachineConfigPools defines the machine config
pool specific configuration for the upgrade job
items:
description: UpgradeJobMachineConfigPoolSpec allows
configuring the upgrade of a machine config pool
properties:
delayUpgrade:
description: DelayUpgrade defines whether to delay
the upgrade of the machine config pool
properties:
delayMax:
description: |-
DelayMax defines the maximum delay after which the upgrade of the machine config pool should start.
Relative to the `.spec.startBefore` timestamp of the upgrade job.
If the upgrade of the machine config pool can't be started before this time, it is considered failed.
type: string
delayMin:
description: |-
DelayMin defines the delay after which the upgrade of the machine config pool should start.
Relative to the `.spec.startAfter` timestamp of the upgrade job.
type: string
type: object
matchLabels:
description: |-
MatchLabels defines the labels to match the machine config pool.
If empty, all machine config pools are matched.
If nil, no machine config pools are matched.
properties:
matchExpressions:
description: matchExpressions is a list of label
selector requirements. The requirements are
ANDed.
items:
description: |-
A label selector requirement is a selector that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that
the selector applies to.
type: string
operator:
description: |-
operator represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists and DoesNotExist.
type: string
values:
description: |-
values is an array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: |-
matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions, whose key field is "key", the
operator is "In", and the values array contains only "value". The requirements are ANDed.
type: object
type: object
x-kubernetes-map-type: atomic
type: object
type: array
postUpgradeHealthChecks:
description: PostUpgradeHealthChecks defines the health
checks to be performed after the upgrade
Expand Down Expand Up @@ -108,8 +181,9 @@ spec:
type: object
upgradeTimeout:
default: 12h
description: UpgradeTimeout defines the timeout after
which the upgrade is considered failed
description: |-
UpgradeTimeout defines the timeout after which the upgrade is considered failed.
Relative to the `.spec.startAfter` timestamp of the upgrade job.
format: duration
type: string
required:
Expand Down
77 changes: 75 additions & 2 deletions config/crd/bases/managedupgrade.appuio.io_upgradejobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,78 @@ spec:
description: UpgradeJobConfig defines the configuration for the upgrade
job
properties:
machineConfigPools:
description: MachineConfigPools defines the machine config pool
specific configuration for the upgrade job
items:
description: UpgradeJobMachineConfigPoolSpec allows configuring
the upgrade of a machine config pool
properties:
delayUpgrade:
description: DelayUpgrade defines whether to delay the upgrade
of the machine config pool
properties:
delayMax:
description: |-
DelayMax defines the maximum delay after which the upgrade of the machine config pool should start.
Relative to the `.spec.startBefore` timestamp of the upgrade job.
If the upgrade of the machine config pool can't be started before this time, it is considered failed.
type: string
delayMin:
description: |-
DelayMin defines the delay after which the upgrade of the machine config pool should start.
Relative to the `.spec.startAfter` timestamp of the upgrade job.
type: string
type: object
matchLabels:
description: |-
MatchLabels defines the labels to match the machine config pool.
If empty, all machine config pools are matched.
If nil, no machine config pools are matched.
properties:
matchExpressions:
description: matchExpressions is a list of label selector
requirements. The requirements are ANDed.
items:
description: |-
A label selector requirement is a selector that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that the selector
applies to.
type: string
operator:
description: |-
operator represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists and DoesNotExist.
type: string
values:
description: |-
values is an array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: |-
matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions, whose key field is "key", the
operator is "In", and the values array contains only "value". The requirements are ANDed.
type: object
type: object
x-kubernetes-map-type: atomic
type: object
type: array
postUpgradeHealthChecks:
description: PostUpgradeHealthChecks defines the health checks
to be performed after the upgrade
Expand Down Expand Up @@ -81,8 +153,9 @@ spec:
type: object
upgradeTimeout:
default: 12h
description: UpgradeTimeout defines the timeout after which the
upgrade is considered failed
description: |-
UpgradeTimeout defines the timeout after which the upgrade is considered failed.
Relative to the `.spec.startAfter` timestamp of the upgrade job.
format: duration
type: string
required:
Expand Down
Loading

0 comments on commit fa968ce

Please sign in to comment.