diff --git a/kubernetes_cluster/01_main.tf b/kubernetes_cluster/01_main.tf index c2ab6467..923750c2 100644 --- a/kubernetes_cluster/01_main.tf +++ b/kubernetes_cluster/01_main.tf @@ -76,6 +76,7 @@ resource "azurerm_kubernetes_cluster" "this" { workload_identity_enabled = var.workload_identity_enabled oidc_issuer_enabled = local.oidc_issuer_enabled + cost_analysis_enabled = var.cost_analysis_enabled dynamic "network_profile" { for_each = var.network_profile != null ? [var.network_profile] : [] diff --git a/kubernetes_cluster/99_variables.tf b/kubernetes_cluster/99_variables.tf index 17e77921..d5f3daba 100644 --- a/kubernetes_cluster/99_variables.tf +++ b/kubernetes_cluster/99_variables.tf @@ -311,6 +311,14 @@ variable "addon_azure_pod_identity_enabled" { default = false } +# The sku_tier must be set to Standard or Premium to enable this feature. +# Enabling this will add Kubernetes Namespace and Deployment details to the Cost Analysis views in the Azure portal. +variable "cost_analysis_enabled" { + type = bool + default = false + description = "(Optional) Should cost analysis be enabled for this Kubernetes Cluster? Defaults to false." +} + # # 📄 Logs # diff --git a/kubernetes_cluster/README.md b/kubernetes_cluster/README.md index 63de4bee..2eff6b92 100644 --- a/kubernetes_cluster/README.md +++ b/kubernetes_cluster/README.md @@ -564,6 +564,7 @@ No modules. | [addon\_azure\_policy\_enabled](#input\_addon\_azure\_policy\_enabled) | Should the Azure Policy addon be enabled for this Node Pool? | `bool` | `false` | no | | [alerts\_enabled](#input\_alerts\_enabled) | Should Metrics Alert be enabled? | `bool` | `true` | no | | [automatic\_channel\_upgrade](#input\_automatic\_channel\_upgrade) | (Optional) The upgrade channel for this Kubernetes Cluster. Possible values are patch, rapid, node-image and stable. Omitting this field sets this value to none. | `string` | `null` | no | +| [cost\_analysis\_enabled](#input\_cost\_analysis\_enabled) | (Optional) Should cost analysis be enabled for this Kubernetes Cluster? Defaults to false. | `bool` | `false` | no | | [custom\_logs\_alerts](#input\_custom\_logs\_alerts) | Map of name = criteria objects |
map(object({| `{}` | no | | [custom\_metric\_alerts](#input\_custom\_metric\_alerts) | Map of name = criteria objects |
# (Optional) Specifies the display name of the alert rule.
display_name = string
# (Optional) Specifies the description of the scheduled query rule.
description = string
# Assuming each.value includes this attribute for Kusto Query Language (KQL)
query = string
# (Required) Severity of the alert. Should be an integer between 0 and 4.
# Value of 0 is severest.
severity = number
# (Required) Specifies the period of time in ISO 8601 duration format on
# which the Scheduled Query Rule will be executed (bin size).
# If evaluation_frequency is PT1M, possible values are PT1M, PT5M, PT10M,
# PT15M, PT30M, PT45M, PT1H, PT2H, PT3H, PT4H, PT5H, and PT6H. Otherwise,
# possible values are PT5M, PT10M, PT15M, PT30M, PT45M, PT1H, PT2H, PT3H,
# PT4H, PT5H, PT6H, P1D, and P2D.
window_duration = optional(string)
# (Optional) How often the scheduled query rule is evaluated, represented
# in ISO 8601 duration format. Possible values are PT1M, PT5M, PT10M, PT15M,
# PT30M, PT45M, PT1H, PT2H, PT3H, PT4H, PT5H, PT6H, P1D.
evaluation_frequency = string
# Evaluation operation for rule - 'GreaterThan', GreaterThanOrEqual',
# 'LessThan', or 'LessThanOrEqual'.
operator = string
# Result or count threshold based on which rule should be triggered.
# Values must be between 0 and 10000 inclusive.
threshold = number
# (Required) The type of aggregation to apply to the data points in
# aggregation granularity. Possible values are Average, Count, Maximum,
# Minimum,and Total.
time_aggregation_method = string
# (Optional) Specifies the column containing the resource ID. The content
# of the column must be an uri formatted as resource ID.
resource_id_column = optional(string)
# (Optional) Specifies the column containing the metric measure number.
metric_measure_column = optional(string)
dimension = list(object(
{
# (Required) Name of the dimension.
name = string
# (Required) Operator for dimension values. Possible values are
# Exclude,and Include.
operator = string
# (Required) List of dimension values. Use a wildcard * to collect all.
values = list(string)
}
))
# (Required) Specifies the number of violations to trigger an alert.
# Should be smaller or equal to number_of_evaluation_periods.
# Possible value is integer between 1 and 6.
minimum_failing_periods_to_trigger_alert = number
# (Required) Specifies the number of aggregated look-back points.
# The look-back time window is calculated based on the aggregation
# granularity window_duration and the selected number of aggregated points.
# Possible value is integer between 1 and 6.
number_of_evaluation_periods = number
# (Optional) Specifies the flag that indicates whether the alert should
# be automatically resolved or not. Value should be true or false.
# The default is false.
auto_mitigation_enabled = optional(bool)
# (Optional) Specifies the flag which indicates whether this scheduled
# query rule check if storage is configured. Value should be true or false.
# The default is false.
workspace_alerts_storage_enabled = optional(bool)
# (Optional) Specifies the flag which indicates whether the provided
# query should be validated or not. The default is false.
skip_query_validation = optional(bool)
}))
map(object({| `{}` | no | | [default\_metric\_alerts](#input\_default\_metric\_alerts) | Map of name = criteria objects |
# criteria.*.aggregation to be one of [Average Count Minimum Maximum Total]
aggregation = string
# "Insights.Container/pods" "Insights.Container/nodes"
metric_namespace = string
metric_name = string
# criteria.0.operator to be one of [Equals NotEquals GreaterThan GreaterThanOrEqual LessThan LessThanOrEqual]
operator = string
threshold = number
# Possible values are PT1M, PT5M, PT15M, PT30M and PT1H
frequency = string
# Possible values are PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H and P1D.
window_size = string
# Skip metrics validation
skip_metric_validation = optional(bool, false)
dimension = list(object(
{
name = string
operator = string
values = list(string)
}
))
}))
map(object({|
# criteria.*.aggregation to be one of [Average Count Minimum Maximum Total]
aggregation = string
# (Optional) Specifies the description of the scheduled metric rule.
description = optional(string)
# "Insights.Container/pods" "Insights.Container/nodes"
metric_namespace = string
metric_name = string
# criteria.0.operator to be one of [Equals NotEquals GreaterThan GreaterThanOrEqual LessThan LessThanOrEqual]
operator = string
threshold = number
# Possible values are 0, 1, 2, 3 and 4. Defaults to 3.
severity = optional(number)
# Possible values are PT1M, PT5M, PT15M, PT30M and PT1H
frequency = string
# Possible values are PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H and P1D.
window_size = string
# Skip metrics validation
skip_metric_validation = optional(bool, false)
dimension = list(object(
{
name = string
operator = string
values = list(string)
}
))
}))
{| no | diff --git a/kubernetes_opencosts/00_data.tf b/kubernetes_opencosts/00_data.tf new file mode 100644 index 00000000..4d815099 --- /dev/null +++ b/kubernetes_opencosts/00_data.tf @@ -0,0 +1,11 @@ + +data "azurerm_kubernetes_cluster" "aks" { + name = var.aks_name + resource_group_name = var.aks_rg_name +} + +data "kubernetes_namespace" "monitoring" { + metadata { + name = var.kubernetes_namespace + } +} diff --git a/kubernetes_opencosts/01_main.tf b/kubernetes_opencosts/01_main.tf new file mode 100644 index 00000000..e1231a5b --- /dev/null +++ b/kubernetes_opencosts/01_main.tf @@ -0,0 +1,119 @@ +locals { + env_short = substr(var.env, 0, 1) + location = data.azurerm_kubernetes_cluster.aks.location +} + +resource "azurerm_role_definition" "open_cost_role" { + name = "${var.project}-${local.env_short}-${local.location}-OpenCostRole" + scope = data.azurerm_subscription.current.id + description = "Rate Card query role" + permissions { + actions = [ + "Microsoft.Compute/virtualMachines/vmSizes/read", + "Microsoft.Resources/subscriptions/locations/read", + "Microsoft.Resources/providers/read", + "Microsoft.ContainerService/containerServices/read", + "Microsoft.Commerce/RateCard/read" + ] + not_actions = [] + } + assignable_scopes = [ + data.azurerm_subscription.current.id + ] +} + +# Create an Azure User-Assigned Managed Identity (UAMI) +resource "azurerm_user_assigned_identity" "opencost_identity" { + name = "${var.project}-${local.env_short}-${local.location}-opencost-managed-identity" + location = local.location + resource_group_name = data.azurerm_kubernetes_cluster.aks.resource_group_name +} + +# Assign role to UAMI +resource "azurerm_role_assignment" "opencost_identity_role" { + principal_id = azurerm_user_assigned_identity.opencost_identity.principal_id + role_definition_name = azurerm_role_definition.open_cost_role.name + scope = data.azurerm_subscription.current.id +} + +# Identity Details +output "managed_identity_details" { + description = "Dettagli dell'identità gestita User-Assigned per OpenCost" + value = jsonencode({ + identity_id = azurerm_user_assigned_identity.opencost_identity.id + principal_id = azurerm_user_assigned_identity.opencost_identity.principal_id + client_id = azurerm_user_assigned_identity.opencost_identity.client_id + subscription = data.azurerm_subscription.current.id + tenant = data.azurerm_client_config.current.tenant_id + }) +} + +# Kubernetes Secret configs and identity +resource "kubernetes_secret" "azure_managed_identity_refs" { + metadata { + name = "azure-managed-identity" + namespace = data.kubernetes_namespace.monitoring.metadata[0].name + } + + data = { + "client-id" = azurerm_user_assigned_identity.opencost_identity.client_id + "principal-id" = azurerm_user_assigned_identity.opencost_identity.principal_id + "identity-id" = azurerm_user_assigned_identity.opencost_identity.id + "tenant-id" = data.azurerm_client_config.current.tenant_id + } + + type = "Opaque" +} + +# # Helm deployment for "prometheus-opencost-exporter" +resource "helm_release" "prometheus_opencost_exporter" { + name = "prometheus-opencost-exporter" + namespace = data.kubernetes_namespace.monitoring.metadata[0].name + chart = "prometheus-opencost-exporter" + repository = "https://prometheus-community.github.io/helm-charts" + version = "0.1.1" # Adjust the version as needed + + # Set additional values for the Helm chart if required + set { + name = "extraVolumes[0].name" + value = "azure-managed-identity-secret" + } + + set { + name = "extraVolumes[0].secret.secretName" + value = kubernetes_secret.azure_managed_identity_refs.metadata[0].name + } + + set { + name = "opencost.exporter.extraVolumeMounts[0].mountPath" + value = "/var/secrets" + } + + set { + name = "opencost.exporter.extraVolumeMounts[0].name" + value = "azure-managed-identity-secret" + } + + set { + name = "opencost.prometheus.external.url" + value = var.prometheus_config.external_url + } + + set { + name = "opencost.prometheus.internal.namespaceName" + value = var.prometheus_config.namespace + } + set { + name = "opencost.prometheus.internal.port" + value = var.prometheus_config.service_port + } + set { + name = "opencost.prometheus.internal.serviceName" + value = var.prometheus_config.service_name + } + + set { + name = "metrics.serviceMonitor.enabled" + value = "true" + } +} diff --git a/kubernetes_opencosts/99_variables.tf b/kubernetes_opencosts/99_variables.tf new file mode 100644 index 00000000..404e430e --- /dev/null +++ b/kubernetes_opencosts/99_variables.tf @@ -0,0 +1,59 @@ +variable "project" { + type = string + default = "cstar" + validation { + condition = ( + length(var.project) <= 6 + ) + error_message = "Max length is 6 chars." + } +} + +variable "env" { + type = string + validation { + condition = ( + length(var.env) <= 3 + ) + error_message = "Max length is 3 chars." + } +} + +# AKS Variables +################### + +variable "aks_name" { + type = string + description = "(Required) Name of AKS cluster in Azure" +} + +variable "aks_rg_name" { + type = string + description = "(Required) Name of AKS cluster resource group in Azure" +} + +variable "kubernetes_namespace" { + type = string + default = "monitoring" +} + +# Prometheus variables +######################## + +variable "prometheus_config" { + type = object({ + service_port = string + external_url = optional(string, "") + namespace = string + service_name = string + chart_version = optional(string, "1.42.3") + }) + description = "Configuration object for Prometheus deployment, including chart version, optional external URL, namespace, service name, service port, and other related settings." + default = { + namespace = "monitoring" + service_name = "prometheus-service" + service_port = 9090 + chart_version = "1.42.3" + external_url = "" + } +} diff --git a/kubernetes_opencosts/99_versions.tf b/kubernetes_opencosts/99_versions.tf new file mode 100644 index 00000000..50903178 --- /dev/null +++ b/kubernetes_opencosts/99_versions.tf @@ -0,0 +1,27 @@ +terraform { + required_version = ">= 1.3.0" + + required_providers { + helm = { + source = "hashicorp/helm" + version = ">= 2.0.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "<= 2.33.0" + } + azurerm = { + source = "hashicorp/azurerm" + version = "<= 3.116.0" + } + } +} + +provider "azurerm" { + features {} + # Configuration options +} + +data "azurerm_subscription" "current" {} + +data "azurerm_client_config" "current" {} diff --git a/kubernetes_opencosts/README.md b/kubernetes_opencosts/README.md new file mode 100644 index 00000000..65b6fa6e --- /dev/null +++ b/kubernetes_opencosts/README.md @@ -0,0 +1,60 @@ +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3.0 | +| [azurerm](#requirement\_azurerm) | <= 3.116.0 | +| [helm](#requirement\_helm) | 2.16.1 | +| [kubernetes](#requirement\_kubernetes) | <= 2.33.0 | + +## Providers + +| Name | Version | +|------|---------| +| [azurerm](#provider\_azurerm) | 3.116.0 | +| [helm](#provider\_helm) | 2.16.1 | +| [kubernetes](#provider\_kubernetes) | 2.33.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [azurerm_role_assignment.opencost_identity_role](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | +| [azurerm_role_definition.open_cost_role](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_definition) | resource | +| [azurerm_user_assigned_identity.opencost_identity](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/user_assigned_identity) | resource | +| [helm_release.opencost](https://registry.terraform.io/providers/hashicorp/helm/2.16.1/docs/resources/release) | resource | +| [helm_release.prometheus_opencost_exporter](https://registry.terraform.io/providers/hashicorp/helm/2.16.1/docs/resources/release) | resource | +| [kubernetes_secret.azure_managed_identity_refs](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/secret) | resource | +| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source | +| [azurerm_kubernetes_cluster.aks](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/kubernetes_cluster) | data source | +| [azurerm_subscription.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/subscription) | data source | +| [kubernetes_namespace.monitoring](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/data-sources/namespace) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [aks\_name](#input\_aks\_name) | (Required) Name of AKS cluster in Azure | `string` | n/a | yes | +| [aks\_rg\_name](#input\_aks\_rg\_name) | (Required) Name of AKS cluster resource group in Azure | `string` | n/a | yes | +| [domain](#input\_domain) | n/a | `string` | n/a | yes | +| [env](#input\_env) | n/a | `string` | n/a | yes | +| [env\_short](#input\_env\_short) | n/a | `string` | n/a | yes | +| [k8s\_kube\_config\_path\_prefix](#input\_k8s\_kube\_config\_path\_prefix) | n/a | `string` | `"~/.kube"` | no | +| [kubernetes\_namespace](#input\_kubernetes\_namespace) | n/a | `string` | `"monitoring"` | no | +| [location](#input\_location) | n/a | `string` | n/a | yes | +| [location\_short](#input\_location\_short) | Location short like eg: weu, weu.. | `string` | n/a | yes | +| [prefix](#input\_prefix) | n/a | `string` | `"cstar"` | no | +| [prometheus\_chart\_version](#input\_prometheus\_chart\_version) | (Optional) The prometheus chart version to use. | `string` | `"1.42.3"` | no | +| [prometheus\_namespace](#input\_prometheus\_namespace) | (Required) The prometheus namespace. | `string` | n/a | yes | +| [prometheus\_service\_name](#input\_prometheus\_service\_name) | (Required) The prometheus service name. | `string` | n/a | yes | +| [prometheus\_service\_port](#input\_prometheus\_service\_port) | (Required) The prometheus service port. | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [managed\_identity\_details](#output\_managed\_identity\_details) | Dettagli dell'identità gestita User-Assigned per OpenCost |
"node_cpu_usage_percentage": {
"aggregation": "Average",
"description": "High node cpu usage",
"dimension": [
{
"name": "node",
"operator": "Include",
"values": [
"*"
]
}
],
"frequency": "PT15M",
"metric_name": "node_cpu_usage_percentage",
"metric_namespace": "Microsoft.ContainerService/managedClusters",
"operator": "GreaterThan",
"severity": 2,
"threshold": 80,
"window_size": "PT1H"
},
"node_memory_working_set_percentage": {
"aggregation": "Average",
"description": "High node memory usage",
"dimension": [
{
"name": "node",
"operator": "Include",
"values": [
"*"
]
}
],
"frequency": "PT15M",
"metric_name": "node_memory_working_set_percentage",
"metric_namespace": "Microsoft.ContainerService/managedClusters",
"operator": "GreaterThan",
"severity": 2,
"threshold": 80,
"window_size": "PT1H"
},
"pods_failed": {
"aggregation": "Average",
"description": "Pod state phase failed",
"dimension": [
{
"name": "phase",
"operator": "Include",
"values": [
"Failed"
]
},
{
"name": "namespace",
"operator": "Include",
"values": [
"*"
]
}
],
"frequency": "PT15M",
"metric_name": "kube_pod_status_phase",
"metric_namespace": "Microsoft.ContainerService/managedClusters",
"operator": "GreaterThan",
"severity": 1,
"threshold": 0,
"window_size": "PT1H"
}
}