diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml index ee2a6be..b405224 100644 --- a/charts/karpenter_nodes/Chart.yaml +++ b/charts/karpenter_nodes/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: karpenter_nodes -version: 0.1.2 +version: 0.1.3 description: A Helm chart for generating NodeClasses and NodePools for Karpenter maintainers: - name: nadavbuc diff --git a/charts/karpenter_nodes/README.md b/charts/karpenter_nodes/README.md index 1717e84..f8317ac 100644 --- a/charts/karpenter_nodes/README.md +++ b/charts/karpenter_nodes/README.md @@ -72,6 +72,7 @@ Note - Most of the values can be overridden per nodegroup (If not specified, it | `excludeInstanceSize` | Exclude specific instance sizes | `List` | ✓ | ✓ | | `headRoom` | Generate Ultra Low Priority Class for Headroom (see below) | `String` | ✓ | x | | `additionalRequirements` | add NodePool requirements which are not covered by this chart | `List(map)` | ✓ | ✓ | +| `autoTaint` | add NodePool taint with `dedicated` as key and nodegroup name as value (`-` replaced with `_`) | `Boolean` | ✓ | ✓ | ### NodeGroup Configuration | Key Name | Description | Type | Optional? | Optional Per NodeGroup? | diff --git a/charts/karpenter_nodes/examples/argocd_example.yaml b/charts/karpenter_nodes/examples/argocd_example.yaml new file mode 100644 index 0000000..25b55c9 --- /dev/null +++ b/charts/karpenter_nodes/examples/argocd_example.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: karpenter-nodes + namespace: argocd +spec: + project: infra + sources: + - repoURL: 'https://opensource.fiverr.com/public_charts/' + chart: karpenter_nodes + targetRevision: 1.0.0 + helm: + valueFiles: + - $values/karpenter_nodes/eks-dev/common.yaml + - $values/karpenter_nodes/eks-dev/nodegroups.yaml + - $values/karpenter_nodes/eks-dev/userdata.yaml + - repoURL: https://github.com/my_values_repo.git + targetRevision: HEAD + ref: values + destination: + server: https://kubernetes diff --git a/charts/karpenter_nodes/examples/common.yaml b/charts/karpenter_nodes/examples/common.yaml new file mode 100644 index 0000000..ea56785 --- /dev/null +++ b/charts/karpenter_nodes/examples/common.yaml @@ -0,0 +1,73 @@ +clusterName: "eks-dev" + +subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + +securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + +availabilityZones: + - eu-west-1a + - eu-west-1b + - eu-west-1c + +excludeInstanceSize: + - metal + +blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + volumeSize: 100Gi + volumeType: gp3 + encrypted: true + deleteOnTermination: true + iops: 3000 + throughput: 125 + +instances: + minGeneration: 4 + architecture: "amd64" + categories: + - m + - r + - c + cores: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + capacityType: + - spot + + +nodeTags: + team: devops + component: eks-karpenter-nodes + created_by: helm + +amiFamily: AL2 + +excludeFamilies: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad diff --git a/charts/karpenter_nodes/examples/generate.sh b/charts/karpenter_nodes/examples/generate.sh new file mode 100755 index 0000000..f9758ed --- /dev/null +++ b/charts/karpenter_nodes/examples/generate.sh @@ -0,0 +1,2 @@ +#!/bin/bash +helm template karpenter-nodes ../ -f common.yaml -f nodegroups.yaml -f userdata.yaml > output/output.yaml diff --git a/charts/karpenter_nodes/examples/nodegroups.yaml b/charts/karpenter_nodes/examples/nodegroups.yaml new file mode 100644 index 0000000..270b867 --- /dev/null +++ b/charts/karpenter_nodes/examples/nodegroups.yaml @@ -0,0 +1,121 @@ +nodeGroups: + nodes-default: + weight: 2 + instances: + categories: + - m + - r + capacitySpread: + start: 1 + end: 5 + nodes-default-od: + nodeGroupLabel: nodes-default + capacitySpread: + start: 6 + end: 6 + instances: + minGeneration: 5 + categories: + - m + - r + capacityType: + - on-demand + nodeClassRef: + name: nodes-default-amd64 + nodes-workers: + weight: 2 + instances: + categories: + - m + - r + capacitySpread: + start: 1 + end: 5 + autoTaint: true + nodes-workers-c: + nodeGroupLabel: nodes-workers + capacitySpread: + start: 1 + end: 5 + instances: + categories: + - c + autoTaint: true + nodeClassRef: + name: nodes-workers-amd64 + nodes-canary: + instances: {} + capacitySpread: + start: 1 + end: 5 + taints: + - key: "dedicated" + value: "canary" + effect: "NoSchedule" + nodes-jobs: + expireAfter: "Never" + instances: + capacityType: + - on-demand + consolidationPolicy: "WhenEmpty" + taints: + - key: "dedicated" + value: "jobs" + effect: "NoSchedule" + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 9000 + throughput: 125 + volumeSize: 500Gi + volumeType: gp3 + nodes-ingress: + registryCache: "false" + expireAfter: "Never" + instances: + architecture: "multiarch" + capacityType: + - on-demand + taints: + - key: "dedicated" + value: "ingress" + effect: "NoSchedule" + nodes-monitoring: + labels: + prometheus-scrape: "true" #Not Real Use-case + additionalNodeTags: + innercomponent: monitoring + expireAfter: "Never" + instances: + architecture: "multiarch" + capacityType: + - on-demand + taints: + - key: "dedicated" + value: "monitoring" + effect: "NoSchedule" + excludeFamilies: + - x1 #We dont have X in our types but i do want it to not exclude previously defined amd instances + nodes-gpu: + labels: + gpu: "true" + instances: + instanceTypes: + - g5.xlarge + - g5.2xlarge + - g5.4xlarge + categories: + - g + limits: + cpu: "128" + taints: + - key: "dedicated" + value: "gpu" + effect: "NoSchedule" + + + + + diff --git a/charts/karpenter_nodes/examples/output/output.yaml b/charts/karpenter_nodes/examples/output/output.yaml new file mode 100644 index 0000000..34f9c06 --- /dev/null +++ b/charts/karpenter_nodes/examples/output/output.yaml @@ -0,0 +1,1740 @@ +--- +# Source: karpenter_nodes/templates/priorityclass.yaml +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: karpenter-headroom +value: -1000000 +globalDefault: false +description: "Used for dummy pods to generate headroom in karpenter" +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-canary-amd64" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-canary + component: eks-karpenter-nodes + created_by: helm + team: devops + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 3000 + throughput: 125 + volumeSize: 100Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-canary + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."http://registry"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-default-amd64" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-default + component: eks-karpenter-nodes + created_by: helm + team: devops + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 3000 + throughput: 125 + volumeSize: 100Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-default + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."http://registry"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-gpu-amd64" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-gpu + component: eks-karpenter-nodes + created_by: helm + team: devops + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 3000 + throughput: 125 + volumeSize: 100Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-gpu + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."http://registry"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-ingress-multiarch" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-ingress + component: eks-karpenter-nodes + created_by: helm + team: devops + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 3000 + throughput: 125 + volumeSize: 100Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-ingress + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-jobs-amd64" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-jobs + component: eks-karpenter-nodes + created_by: helm + team: devops + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 9000 + throughput: 125 + volumeSize: 500Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-jobs + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."http://registry"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-monitoring-multiarch" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-monitoring + component: eks-karpenter-nodes + created_by: helm + team: devops + innercomponent: monitoring + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 3000 + throughput: 125 + volumeSize: 100Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-monitoring + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."http://registry"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF +--- +# Source: karpenter_nodes/templates/nodeclass.yaml +apiVersion: karpenter.k8s.aws/v1beta1 +kind: EC2NodeClass +metadata: + name: "nodes-workers-amd64" +spec: + role: eks_nodes_role + amiFamily: AL2 + amiSelectorTerms: + subnetSelectorTerms: + - tags: + Name: eks-dev-eu-west-1a + - tags: + Name: eks-dev-eu-west-1b + - tags: + Name: eks-dev-eu-west-1c + securityGroupSelectorTerms: + - tags: + Name: eks-nodes + - tags: + Name: eks-dev + tags: + cluster: eks-dev + nodegroup: nodes-workers + component: eks-karpenter-nodes + created_by: helm + team: devops + managed_by: karpenter + blockDeviceMappings: + - deviceName: /dev/xvda + ebs: + deleteOnTermination: true + encrypted: true + iops: 3000 + throughput: 125 + volumeSize: 100Gi + volumeType: gp3 + detailedMonitoring: false + associatePublicIPAddress: false + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + userData: | + CLUSTER_NAME=eks-dev + INSTANCEGROUP=nodes-workers + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."http://registry"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-canary-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-canary + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-canary-amd64 + taints: + - key: dedicated + value: canary + effect: NoSchedule + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - c + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - spot + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + - key: capacity-spread + operator: In + values: + - "1" + - "2" + - "3" + - "4" + - "5" + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: 720h + consolidationPolicy: WhenUnderutilized + weight: 1 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-default-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-default + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-default-amd64 + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - spot + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + - key: capacity-spread + operator: In + values: + - "1" + - "2" + - "3" + - "4" + - "5" + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: 720h + consolidationPolicy: WhenUnderutilized + weight: 2 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-default-od-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-default + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-default-amd64 + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "4" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - on-demand + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + - key: capacity-spread + operator: In + values: + - "6" + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: 720h + consolidationPolicy: WhenUnderutilized + weight: 1 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-gpu-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-gpu + cluster: eks-dev + gpu: true + spec: + nodeClassRef: + name: nodes-gpu-amd64 + taints: + - key: dedicated + value: gpu + effect: NoSchedule + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - g + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - spot + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + - key: node.kubernetes.io/instance-type + operator: In + values: + - g5.xlarge + - g5.2xlarge + - g5.4xlarge + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: 720h + consolidationPolicy: WhenUnderutilized + limits: + cpu: 128 + weight: 1 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-ingress-multiarch" +spec: + template: + metadata: + labels: + nodegroup: nodes-ingress + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-ingress-multiarch + taints: + - key: dedicated + value: ingress + effect: NoSchedule + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - c + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - arm64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - on-demand + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: Never + consolidationPolicy: WhenUnderutilized + weight: 1 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-jobs-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-jobs + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-jobs-amd64 + taints: + - key: dedicated + value: jobs + effect: NoSchedule + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - c + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - on-demand + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: Never + consolidationPolicy: WhenEmpty + consolidateAfter: 5m + weight: 1 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-monitoring-multiarch" +spec: + template: + metadata: + labels: + nodegroup: nodes-monitoring + cluster: eks-dev + prometheus-scrape: true + spec: + nodeClassRef: + name: nodes-monitoring-multiarch + taints: + - key: dedicated + value: monitoring + effect: NoSchedule + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - c + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - arm64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - on-demand + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - x1 + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: Never + consolidationPolicy: WhenUnderutilized + weight: 1 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-workers-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-workers + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-workers-amd64 + taints: + - key: dedicated + effect: NoSchedule + value: nodes_workers + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - m + - r + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - spot + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + - key: capacity-spread + operator: In + values: + - "1" + - "2" + - "3" + - "4" + - "5" + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: 720h + consolidationPolicy: WhenUnderutilized + weight: 2 +--- +# Source: karpenter_nodes/templates/nodepool.yaml +apiVersion: karpenter.sh/v1beta1 +kind: NodePool +metadata: + name: "nodes-workers-c-amd64" +spec: + template: + metadata: + labels: + nodegroup: nodes-workers + cluster: eks-dev + spec: + nodeClassRef: + name: nodes-workers-amd64 + taints: + - key: dedicated + effect: NoSchedule + value: nodes_workers + requirements: + - key: "karpenter.k8s.aws/instance-category" + operator: In + values: + - c + - key: "karpenter.k8s.aws/instance-cpu" + operator: In + values: + - "4" + - "8" + - "12" + - "16" + - "24" + - "32" + - "48" + - key: karpenter.k8s.aws/instance-generation + operator: Gt + values: + - "3" + - key: "topology.kubernetes.io/zone" + operator: In + values: + - eu-west-1a + - eu-west-1b + - eu-west-1c + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + - key: "karpenter.sh/capacity-type" + operator: In + values: + - spot + - key: kubernetes.io/os + operator: In + values: + - linux + - key: "karpenter.k8s.aws/instance-family" + operator: NotIn + values: + - c6a + - m6a + - r6a + - c5a + - m5a + - r5a + - c6ad + - m6ad + - r6ad + - m5ad + - r5ad + - r5ad + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: + - metal + - key: capacity-spread + operator: In + values: + - "1" + - "2" + - "3" + - "4" + - "5" + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s + disruption: + expireAfter: 720h + consolidationPolicy: WhenUnderutilized + weight: 1 diff --git a/charts/karpenter_nodes/examples/userdata.yaml b/charts/karpenter_nodes/examples/userdata.yaml new file mode 100644 index 0000000..9a30a5f --- /dev/null +++ b/charts/karpenter_nodes/examples/userdata.yaml @@ -0,0 +1,67 @@ +registry: "registry-1.docker.io" +registryCache: "true" +registryHost: "http://registry" + +userData: | + CLUSTER_NAME={{ $.Values.clusterName }} + INSTANCEGROUP={{ .value.nodeGroupLabel | default .key }} + INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` + ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-5,6)}'` + HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" + hostname $HOSTNAME + echo $HOSTNAME > /etc/hostname + aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME + sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts + # Sysctl changes + ## Disable IPv6 + cat < /etc/sysctl.d/10-disable-ipv6.conf + # disable ipv6 config + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 + net.ipv6.conf.lo.disable_ipv6 = 1 + EOF + ## Stolen from this guy: https://blog.codeship.com/running-1000-containers-in-docker-swarm/ + cat < /etc/sysctl.d/99-kube-net.conf + # Have a larger connection range available + net.ipv4.ip_local_port_range=1024 65000 + # Reuse closed sockets faster + net.ipv4.tcp_tw_reuse=1 + net.ipv4.tcp_fin_timeout=15 + # The maximum number of "backlogged sockets". Default is 128. + net.core.somaxconn=4096 + net.core.netdev_max_backlog=4096 + # 16MB per socket - which sounds like a lot, + # but will virtually never consume that much. + net.core.rmem_max=16777216 + net.core.wmem_max=16777216 + # Various network tunables + net.ipv4.tcp_max_syn_backlog=20480 + net.ipv4.tcp_max_tw_buckets=400000 + net.ipv4.tcp_no_metrics_save=1 + net.ipv4.tcp_rmem=4096 87380 16777216 + net.ipv4.tcp_syn_retries=2 + net.ipv4.tcp_synack_retries=2 + net.ipv4.tcp_wmem=4096 65536 16777216 + #vm.min_free_kbytes=65536 + # Connection tracking to prevent dropped connections (usually issue on LBs) + net.netfilter.nf_conntrack_max=262144 + net.ipv4.netfilter.ip_conntrack_generic_timeout=120 + net.netfilter.nf_conntrack_tcp_timeout_established=86400 + # ARP cache settings for a highly loaded docker swarm + net.ipv4.neigh.default.gc_thresh1=8096 + net.ipv4.neigh.default.gc_thresh2=12288 + net.ipv4.neigh.default.gc_thresh3=16384 + EOF + systemctl restart systemd-sysctl.service + #Increase RegistryQPS + echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + {{- if eq ( .value.registryCache | default $.Values.registryCache ) "true" }} + mkdir -p /etc/containerd/certs.d/docker.io + cat</etc/containerd/certs.d/docker.io/hosts.toml + server = "https://registry-1.docker.io" + [host."{{ .value.registryHost | default $.Values.registryHost }}"] + capabilities = ["pull", "resolve"] + skip_verify = true + EOF + {{- end }} diff --git a/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json b/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json index c2ad88f..014fd68 100644 --- a/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json +++ b/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json @@ -74,7 +74,7 @@ "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, - "iteration": 1712641791200, + "iteration": 1713189648192, "links": [], "liveNow": false, "panels": [ @@ -1863,7 +1863,8 @@ "value": 80 } ] - } + }, + "unit": "short" }, "overrides": [] }, @@ -1873,6 +1874,219 @@ "x": 0, "y": 55 }, + "id": 57, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(karpenter_nodes_total_pod_requests{nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) by (nodepool)", + "instant": false, + "interval": "", + "legendFormat": "[{{nodepool}}] Pods", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(karpenter_nodes_total_daemon_requests{nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) by (nodepool)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "[{{nodepool}}] DaemonSets", + "refId": "C" + } + ], + "title": "CPU Requests for Pods and DaemonSets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 59, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(karpenter_nodes_total_pod_requests{nodepool=~\"$NODEPOOL\",resource_type=\"memory\"}) by (nodepool)", + "instant": false, + "interval": "", + "legendFormat": "[{{nodepool}}] Pods", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(karpenter_nodes_total_daemon_requests{nodepool=~\"$NODEPOOL\",resource_type=\"memory\"}) by (nodepool)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "[{{nodepool}}] DaemonSets", + "refId": "C" + } + ], + "title": "Memory Requests for Pods and DaemonSets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, "id": 35, "options": { "legend": { @@ -1967,7 +2181,7 @@ "h": 8, "w": 12, "x": 12, - "y": 55 + "y": 63 }, "id": 30, "options": { @@ -2063,7 +2277,7 @@ "h": 8, "w": 12, "x": 0, - "y": 63 + "y": 71 }, "id": 45, "options": { @@ -2158,7 +2372,7 @@ "h": 8, "w": 12, "x": 12, - "y": 63 + "y": 71 }, "id": 46, "options": { @@ -2282,4 +2496,4 @@ "uid": "5DCs2Z-Sk", "version": 2, "weekStart": "" -} \ No newline at end of file +} diff --git a/charts/karpenter_nodes/nodegroups_example_values.yaml b/charts/karpenter_nodes/nodegroups_example_values.yaml deleted file mode 100644 index d60cf2a..0000000 --- a/charts/karpenter_nodes/nodegroups_example_values.yaml +++ /dev/null @@ -1,74 +0,0 @@ -excludeInstanceSize: - - metal -excludeFamilies: - - c6a - - m6a - - r6a - - c5a - - m5a - - r5a - - c6ad - - m6ad - - r6ad - - m5ad - - r5ad - - r5ad - -nodeGroups: - nodes-default: - blockDeviceMappings: - - deviceName: /dev/xvdb - ebs: - volumeSize: 100Gi - volumeType: gp3 - encrypted: false - deleteOnTermination: true - iops: 3000 - throughput: 125 - instances: {} - nodeHeadRooms: - - size: small - count: 2 - antiAffinitySpec: - - key: testlabel1 - operator: Exists - labels: - testlabel1: label1 - testlabel2: label2 - taints: - - key: testtaint1 - effect: noSchedule - value: taint1 - - key: testtaint2 - effect: noSchedule - value: taint2 - nodes-workers: - consolidation: "false" - blockDeviceMappings: - - deviceName: /dev/xvda - ebs: - volumeSize: 100Gi - volumeType: gp3 - encrypted: false - deleteOnTermination: true - iops: 3000 - throughput: 125 - instances: - architecture: "arm64" - minGeneration: 5 - categories: - - t - - x - cores: - - "4" - capacityType: - - on-demand - subnets: - - workers-1a - - workers-1b - - workers-1c - securityGroups: - - workersonly - availabilityZones: - - eu-west-1a - - eu-west-1b diff --git a/charts/karpenter_nodes/templates/nodepool.yaml b/charts/karpenter_nodes/templates/nodepool.yaml index 427c43a..ab45797 100644 --- a/charts/karpenter_nodes/templates/nodepool.yaml +++ b/charts/karpenter_nodes/templates/nodepool.yaml @@ -28,13 +28,20 @@ spec: {{- else }} name: {{ $k }}-{{ $v.instances.architecture | default $.Values.instances.architecture }} {{- end }} - {{- if hasKey $v "taints" }} + {{- if or (hasKey $v "taints") ($v.autoTaint | default $.Values.autoTaint) }} taints: + {{- if $v.autoTaint | default $.Values.autoTaint }} + - key: dedicated + effect: NoSchedule + value: {{ ($v.nodeGroupLabel | default $k) | replace "-" "_" }} + {{- end }} + {{- if hasKey $v "taints" }} {{- range $v.taints }} - key: {{ .key }} value: {{ .value }} effect: {{ .effect }} {{- end }} + {{- end }} {{- end }} {{- if hasKey $v "startupTaints" }} startupTaints: diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml index 6b9aa48..473ef5f 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml @@ -36,7 +36,7 @@ tests: value: taint1 - equal: path: spec.template.spec.taints[0].effect - value: noSchedule + value: NoSchedule - equal: path: spec.template.spec.taints[1].key value: testtaint2 @@ -45,7 +45,7 @@ tests: value: taint2 - equal: path: spec.template.spec.taints[1].effect - value: noSchedule + value: NoSchedule - it: Verify nodes-default requirements diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml index 9ae4919..b75f104 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml @@ -38,7 +38,7 @@ tests: value: taint1 - equal: path: spec.template.spec.startupTaints[0].effect - value: noSchedule + value: NoSchedule - equal: path: spec.template.spec.startupTaints[1].key value: testtaint2 @@ -47,7 +47,17 @@ tests: value: taint2 - equal: path: spec.template.spec.startupTaints[1].effect - value: noSchedule + value: NoSchedule + - equal: + path: spec.template.spec.taints[0].key + value: dedicated + - equal: + path: spec.template.spec.taints[0].value + value: nodes_workers + - equal: + path: spec.template.spec.taints[0].effect + value: NoSchedule + - it: Verify nodes-workers requirements documentIndex: 2 diff --git a/charts/karpenter_nodes/tests/values.yaml b/charts/karpenter_nodes/tests/values.yaml index d1273dc..878d36a 100644 --- a/charts/karpenter_nodes/tests/values.yaml +++ b/charts/karpenter_nodes/tests/values.yaml @@ -13,10 +13,10 @@ nodeGroups: testlabel2: label2 taints: - key: testtaint1 - effect: noSchedule + effect: NoSchedule value: taint1 - key: testtaint2 - effect: noSchedule + effect: NoSchedule value: taint2 excludeFamilies: - c6a @@ -123,11 +123,12 @@ nodeGroups: memory: "384Gi" startupTaints: - key: testtaint1 - effect: noSchedule + effect: NoSchedule value: taint1 - key: testtaint2 - effect: noSchedule + effect: NoSchedule value: taint2 + autoTaint: true #Default cluster Settings clusterName: "eks-dev" diff --git a/charts/karpenter_nodes/userdata_example_values.yaml b/charts/karpenter_nodes/userdata_example_values.yaml deleted file mode 100644 index b41dd9a..0000000 --- a/charts/karpenter_nodes/userdata_example_values.yaml +++ /dev/null @@ -1,45 +0,0 @@ -registry: "registry-1.docker.io" -registryCache: true -registryHost: "" #"http://registry" -registrySkipVerify: true -disableIPv6: true - -nodeGroups: - nodes-default: - instances: {} -userData: | - CLUSTER_NAME={{ $.Values.clusterName }} - INSTANCEGROUP={{ .value.nodeGroupLabel | default .key }} - INSTANCE_ID=`/usr/bin/ec2-metadata --instance-id | awk '{print $2}'` - ID_SUFFIX=`/usr/bin/ec2-metadata --instance-id | awk '{print substr($0,length-3,4)}'` - HOSTNAME="${CLUSTER_NAME}-${INSTANCEGROUP}-$ID_SUFFIX" - hostname $HOSTNAME - echo $HOSTNAME > /etc/hostname - aws ec2 create-tags --resources $INSTANCE_ID --tags=Key=Name,Value=$HOSTNAME - sed -i "s/127.0.0.1 [0-9a-z-]*\s*localhost/127.0.0.1 $HOSTNAME localhost/" /etc/hosts - # Sysctl changes - {{ if ( .disableIPv6 | default $.Values.disableIPv6) }} - ## Disable IPv6 - cat < /etc/sysctl.d/10-disable-ipv6.conf - # disable ipv6 config - net.ipv6.conf.all.disable_ipv6 = 1 - net.ipv6.conf.default.disable_ipv6 = 1 - net.ipv6.conf.lo.disable_ipv6 = 1 - systemctl restart systemd-sysctl.service - EOF - {{ end }} - #Increase RegistryQPS - echo "$(jq '.registryPullQPS=100' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - {{ if (.value.registryCache | default $.Values.registryCache) }} - #Registry Cache - mkdir -p /etc/containerd/certs.d/{{ .value.registry | default $.Values.registry }} - cat</etc/containerd/certs.d/{{ .value.registry | default $.Values.registry }}/hosts.toml - server = "https://{{ .value.registry | default $.Values.registry }}" - [host."{{ .value.registryHost | default $.Values.registryHost }}"] - capabilities = ["pull", "resolve"] - {{- if .value.registrySkipVerify | default $.Values.registrySkipVerify }} - skip_verify = true - {{- end }} - EOF - {{- end }} diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index 526c7c1..f82fce5 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -81,6 +81,7 @@ instances: operatingSystems: - linux +autoTaint: false ## Exclude weak types of AMD instances # excludeFamilies: # - c6a