Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support of robot machine #1405

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions data.tf
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
data "github_release" "hetzner_ccm" {
count = var.hetzner_ccm_version == null ? 1 : 0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this being removed, this is important as it always grabs the latest and greatest.

repository = "hcloud-cloud-controller-manager"
owner = "hetznercloud"
retrieve_by = "latest"
}

data "github_release" "hetzner_csi" {
count = var.hetzner_csi_version == null && !var.disable_hetzner_csi ? 1 : 0
repository = "csi-driver"
Expand Down
10 changes: 6 additions & 4 deletions init.tf
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ resource "null_resource" "kustomization" {
local.nginx_values,
local.haproxy_values,
local.calico_values,
local.ccm_values,
local.cilium_values,
local.longhorn_values,
local.csi_driver_smb_values,
Expand All @@ -137,6 +138,8 @@ resource "null_resource" "kustomization" {
coalesce(var.traefik_version, "N/A"),
coalesce(var.nginx_version, "N/A"),
coalesce(var.haproxy_version, "N/A"),
coalesce(var.hcloud_robot_user, "N/A"),
coalesce(var.hcloud_robot_password, "N/A"),
])
options = join("\n", [
for option, value in local.kured_options : "${option}=${value}"
Expand Down Expand Up @@ -198,9 +201,8 @@ resource "null_resource" "kustomization" {
content = templatefile(
"${path.module}/templates/ccm.yaml.tpl",
{
cluster_cidr_ipv4 = var.cluster_ipv4_cidr
default_lb_location = var.load_balancer_location
using_klipper_lb = local.using_klipper_lb
version = var.hetzner_ccm_version
values = indent(4, trimspace(local.ccm_values))
})
destination = "/var/post_install/ccm.yaml"
}
Expand Down Expand Up @@ -315,7 +317,7 @@ resource "null_resource" "kustomization" {
provisioner "remote-exec" {
inline = [
"set -ex",
"kubectl -n kube-system create secret generic hcloud --from-literal=token=${var.hcloud_token} --from-literal=network=${data.hcloud_network.k3s.name} --dry-run=client -o yaml | kubectl apply -f -",
"kubectl -n kube-system create secret generic hcloud --from-literal=token=${var.hcloud_token} --from-literal=network=${data.hcloud_network.k3s.name} --from-literal=robot-user=${var.hcloud_robot_user} --from-literal=robot-password=${var.hcloud_robot_password} --dry-run=client -o yaml | kubectl apply -f -",
"kubectl -n kube-system create secret generic hcloud-csi --from-literal=token=${var.hcloud_token} --dry-run=client -o yaml | kubectl apply -f -",
]
}
Expand Down
20 changes: 20 additions & 0 deletions kube.tf.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ module "kube-hetzner" {
}
hcloud_token = var.hcloud_token != "" ? var.hcloud_token : local.hcloud_token

# To add bare metal support you need to supply vswitch_id
hcloud_robot_password = var.hcloud_robot_password
# This is your customer Number K1059261220 (not an email)
hcloud_robot_user = var.hcloud_robot_user
# Due to limit number of vswitch resource and slow cancel process
# you need to create vswitch yourself
# https://docs.hetzner.com/robot/dedicated-server/network/vswitch
# https://docs.hetzner.com/cloud/networks/connect-dedi-vswitch
vswitch_id = null

# Then fill or edit the below values. Only the first values starting with a * are obligatory; the rest can remain with their default values, or you
# could adapt them to your needs.

Expand Down Expand Up @@ -1078,3 +1088,13 @@ variable "hcloud_token" {
sensitive = true
default = ""
}

variable "hcloud_robot_user" {
sensitive = true
default = ""
}

variable "hcloud_robot_password" {
sensitive = true
default = ""
}
41 changes: 38 additions & 3 deletions locals.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
locals {

Check failure on line 1 in locals.tf

View workflow job for this annotation

GitHub Actions / Check formatting of terraform files

File is not in canonical format (terraform fmt)
# ssh_agent_identity is not set if the private key is passed directly, but if ssh agent is used, the public key tells ssh agent which private key to use.
# For terraforms provisioner.connection.agent_identity, we need the public key as a string.
ssh_agent_identity = var.ssh_private_key == null ? var.ssh_public_key : null
Expand All @@ -6,11 +6,11 @@
# If passed, a key already registered within hetzner is used.
# Otherwise, a new one will be created by the module.
hcloud_ssh_key_id = var.hcloud_ssh_key_id == null ? hcloud_ssh_key.k3s[0].id : var.hcloud_ssh_key_id
using_hcloud_robot = var.hcloud_robot_user != "" && var.hcloud_robot_password != "" && var.vswitch_id != ""

# if given as a variable, we want to use the given token. This is needed to restore the cluster
k3s_token = var.k3s_token == null ? random_password.k3s_token.result : var.k3s_token

ccm_version = var.hetzner_ccm_version != null ? var.hetzner_ccm_version : data.github_release.hetzner_ccm[0].release_tag
csi_version = length(data.github_release.hetzner_csi) == 0 ? var.hetzner_csi_version : data.github_release.hetzner_csi[0].release_tag
kured_version = var.kured_version != null ? var.kured_version : data.github_release.kured[0].release_tag
calico_version = length(data.github_release.calico) == 0 ? var.calico_version : data.github_release.calico[0].release_tag
Expand Down Expand Up @@ -74,7 +74,7 @@
kind = "Kustomization"
resources = concat(
[
"https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/download/${local.ccm_version}/ccm-networks.yaml",
"ccm.yaml",
"https://github.com/kubereboot/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml",
"https://raw.githubusercontent.com/rancher/system-upgrade-controller/9e7e45c1bdd528093da36be1f1f32472469005e6/manifests/system-upgrade-controller.yaml",
],
Expand Down Expand Up @@ -436,6 +436,38 @@
kube_controller_manager_arg = "flex-volume-plugin-dir=/var/lib/kubelet/volumeplugins"
flannel_iface = "eth1"

# Not to be confused with the other helm values, this is used for the calico.yaml kustomize patch
# It also serves as a stub for a potential future use via helm values
ccm_values = var.ccm_values != "" ? var.ccm_values : <<EOT
args:
leader-elect: "false"
allocate-node-cidrs: "true"
%{if local.using_klipper_lb~}
secure-port: 10288
%{endif~}
networking:
enabled: true
clusterCIDR: ${var.cluster_ipv4_cidr}
env:
HCLOUD_LOAD_BALANCERS_LOCATION:
value: "${var.load_balancer_location}"
HCLOUD_LOAD_BALANCERS_USE_PRIVATE_IP:
value: "true"
HCLOUD_LOAD_BALANCERS_ENABLED:
value: "${!local.using_klipper_lb}"
HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS:
value: "true"
%{if local.using_hcloud_robot~}
# see https://github.com/hetznercloud/hcloud-cloud-controller-manager/issues/630#issuecomment-2039136344
HCLOUD_NETWORK_ROUTES_ENABLED:
value: "false"
HCLOUD_DEBUG:
value: "1"
%{endif~}
robot:
enabled: ${local.using_hcloud_robot}
EOT

cilium_values = var.cilium_values != "" ? var.cilium_values : <<EOT
# Enable Kubernetes host-scope IPAM mode (required for K3s + Hetzner CCM)
ipam:
Expand Down Expand Up @@ -470,7 +502,10 @@

loadBalancer:
# Enable LoadBalancer & NodePort XDP Acceleration (direct routing (routingMode=native) is recommended to achieve optimal performance)
acceleration: native
# vSwitch used to connect bare metal host does not support XDP, so we use
# best-effort to make cilium to run accross vSwitch
# https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#loadbalancer-nodeport-xdp-acceleration
acceleration: best-effort

bpf:
# Enable eBPF-based Masquerading ("The eBPF-based implementation is the most efficient implementation")
Expand Down
20 changes: 20 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
provider "hetzner-robot" {

Check failure on line 1 in main.tf

View workflow job for this annotation

GitHub Actions / Check formatting of terraform files

File is not in canonical format (terraform fmt)
username = var.hcloud_robot_user
password = var.hcloud_robot_password
}

resource "random_password" "k3s_token" {
length = 48
special = false
Expand Down Expand Up @@ -27,12 +32,27 @@
name = var.cluster_name
ip_range = var.network_ipv4_cidr
labels = local.labels
expose_routes_to_vswitch = local.using_hcloud_robot
}

# resource "hetzner-robot_vswitch" "k3s" {
# count = local.using_hcloud_robot && var.vswitch_id == null ? 1 : 0
# vlan = var.vlan_id
# }

data "hcloud_network" "k3s" {
id = local.use_existing_network ? var.existing_network_id[0] : hcloud_network.k3s[0].id
}

resource "hcloud_network_subnet" "vswitch" {
count = local.using_hcloud_robot ? 1 : 0
network_id = data.hcloud_network.k3s.id
type = "vswitch"
network_zone = var.network_region
ip_range = local.network_ipv4_subnets[100]
vswitch_id = var.vswitch_id
}

# We start from the end of the subnets cidr array,
# as we would have fewer control plane nodepools, than agent ones.
resource "hcloud_network_subnet" "control_plane" {
Expand Down
19 changes: 19 additions & 0 deletions robot.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
locals {

Check failure on line 1 in robot.tf

View workflow job for this annotation

GitHub Actions / Check formatting of terraform files

File is not in canonical format (terraform fmt)
server_numbers_map = { for idx, server_number in var.server_numbers : tostring(server_number) => server_number }
}

data "hetzner-robot_server" "servers" {
for_each = local.server_numbers_map
server_number = each.key
}

output "servers_info" {
value = {
for k, v in data.hetzner-robot_server.servers :
k => {
ip = v.server_ip
name = v.server_name
status = v.status
}
}
}
38 changes: 10 additions & 28 deletions templates/ccm.yaml.tpl
Original file line number Diff line number Diff line change
@@ -1,32 +1,14 @@
---
apiVersion: apps/v1
kind: Deployment
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: hcloud-cloud-controller-manager
name: hccm
namespace: kube-system
spec:
template:
spec:
containers:
- name: hcloud-cloud-controller-manager
command:
- "/bin/hcloud-cloud-controller-manager"
- "--cloud-provider=hcloud"
- "--leader-elect=false"
- "--allow-untagged-cloud"
- "--allocate-node-cidrs=true"
- "--cluster-cidr=${cluster_cidr_ipv4}"
- "--webhook-secure-port=0"
%{if using_klipper_lb~}
- "--secure-port=10288"
%{endif~}
env:
- name: "HCLOUD_LOAD_BALANCERS_LOCATION"
value: "${default_lb_location}"
- name: "HCLOUD_LOAD_BALANCERS_USE_PRIVATE_IP"
value: "true"
- name: "HCLOUD_LOAD_BALANCERS_ENABLED"
value: "${!using_klipper_lb}"
- name: "HCLOUD_LOAD_BALANCERS_DISABLE_PRIVATE_INGRESS"
value: "true"

chart: hcloud-cloud-controller-manager
repo: https://charts.hetzner.cloud
version: "${version}"
targetNamespace: kube-system
bootstrap: true
valuesContent: |-
${values}
7 changes: 7 additions & 0 deletions values-export.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
resource "local_file" "ccm_values" {
count = var.export_values ? 1 : 0
content = local.ccm_values
filename = "ccm_values.yaml"
file_permission = "600"
}

resource "local_file" "cilium_values" {
count = var.export_values && var.cni_plugin == "cilium" ? 1 : 0
content = local.cilium_values
Expand Down
37 changes: 35 additions & 2 deletions variables.tf
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
variable "hcloud_token" {

Check failure on line 1 in variables.tf

View workflow job for this annotation

GitHub Actions / Check formatting of terraform files

File is not in canonical format (terraform fmt)
description = "Hetzner Cloud API Token."
type = string
sensitive = true
}

variable "hcloud_robot_user" {
description = "Hetzner Robot User."
type = string
sensitive = true
default = ""
}

variable "hcloud_robot_password" {
description = "Hetzner Robot Password."
type = string
sensitive = true
default = ""
}

variable "k3s_token" {
description = "k3s master token (must match when restoring a cluster)."
type = string
Expand Down Expand Up @@ -74,6 +88,19 @@
type = string
default = "eu-central"
}

variable "vswitch_id" {
description = "vSwitch to connect to cloud."
type = string
default = null
}

variable "server_numbers" {
type = list(number)
description = "List of Hetzner server numbers"
default = []
}

variable "existing_network_id" {
# Unfortunately, we need this to be a list or null. If we only use a plain
# string here, and check that existing_network_id is null, terraform will
Expand Down Expand Up @@ -347,7 +374,7 @@

variable "hetzner_ccm_version" {
type = string
default = null
default = ""
description = "Version of Kubernetes Cloud Controller Manager for Hetzner Cloud."
}

Expand Down Expand Up @@ -679,6 +706,12 @@
}
}

variable "ccm_values" {
type = string
default = ""
description = "Additional helm values file to pass to hcloud-cloud-controller-manager as 'valuesContent' at the HelmChart."
}

variable "cilium_values" {
type = string
default = ""
Expand All @@ -687,7 +720,7 @@

variable "cilium_version" {
type = string
default = "1.15.1"
default = "1.15.7"
description = "Version of Cilium."
}

Expand Down
4 changes: 4 additions & 0 deletions versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,9 @@ terraform {
source = "tenstad/remote"
version = ">= 0.1.2"
}
hetzner-robot = {
source = "strng-solutions/hetzner-robot"
version = "3.4.0"
}
}
}