From 6fb58a222953e2a8b80950c7e4f61b864d84449e Mon Sep 17 00:00:00 2001 From: Maximilien Cuony Date: Tue, 20 Jan 2026 09:38:31 +0100 Subject: [PATCH 1/2] [monitoring] Expose prometheus as TCP services --- .../dss/templates/_networking-google.tpl | 3 + .../templates/prometheus-loadbalancers.yaml | 103 +++--------------- deploy/services/tanka/metadata_base.libsonnet | 1 - deploy/services/tanka/prometheus.libsonnet | 26 ++++- 4 files changed, 36 insertions(+), 97 deletions(-) diff --git a/deploy/services/helm-charts/dss/templates/_networking-google.tpl b/deploy/services/helm-charts/dss/templates/_networking-google.tpl index a9cdc412f..09764ba9f 100644 --- a/deploy/services/helm-charts/dss/templates/_networking-google.tpl +++ b/deploy/services/helm-charts/dss/templates/_networking-google.tpl @@ -1,3 +1,6 @@ +{{- define "google-lb-default-annotations" -}} +{{- end -}} + {{- define "google-lb-crdb-annotations" -}} {{- end -}} diff --git a/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml b/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml index c06cb7d46..90dd46033 100644 --- a/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml +++ b/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml @@ -3,113 +3,36 @@ {{- if $.Values.monitoring.enabled }} {{- if $.Values.monitoring.externalService.enabled }} -{{- if eq $cloudProvider "google" }} - ---- -apiVersion: cloud.google.com/v1 -kind: BackendConfig -metadata: - name: prometheus-external -spec: - securityPolicy: - name: "{{ $.Values.monitoring.externalService.allowedIPsPolicy }}" - --- apiVersion: v1 kind: Service -metadata: - labels: - app: {{$.Release.Name}}-prometheus - name: {{$.Release.Name}}-prometheus-external - annotations: - cloud.google.com/backend-config: '{"default": "prometheus-external"}' - name: {{$.Release.Name}}-prometheus-external -spec: - ports: - - name: prometheus - port: 9090 - targetPort: 9090 - publishNotReadyAddresses: true - selector: - app.kubernetes.io/instance: "{{$.Release.Name}}" - app.kubernetes.io/name: "prometheus" - type: ClusterIP - ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress metadata: annotations: - {{- include (printf "%s-ingress-prometheus-annotations" $cloudProvider) + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + {{- include (printf "%s-lb-default-annotations" $cloudProvider) (dict - "certName" (printf "%s-prometheus-https-certificate" $.Release.Name) + "name" "prometheus-external" "ip" $.Values.monitoring.externalService.ip - "frontendConfig" (empty .sslPolicy | ternary "" "ssl-frontend-config") + "subnet" $.Values.monitoring.externalService.subnet + "cloudProvider" $cloudProvider ) | nindent 4 }} labels: - name: {{$.Release.Name}}-prometheus-https-ingress - name: {{$.Release.Name}}-prometheus-https-ingress -spec: - {{- include (printf "%s-ingress-spec" $cloudProvider) . | nindent 2 }} - rules: - - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: {{$.Release.Name}}-prometheus-external - port: - number: 9090 - ---- -apiVersion: networking.gke.io/v1 -kind: ManagedCertificate -metadata: - labels: - name: {{$.Release.Name}}-prometheus-https-certificate - name: {{$.Release.Name}}-prometheus-https-certificate -spec: - domains: - - {{ $.Values.monitoring.externalService.hostname }} - -{{- else }} - ---- -apiVersion: v1 -kind: Service -metadata: - annotations: - {{- include (printf "%s-ingress-prometheus-annotations" $cloudProvider) - (merge $.Values.monitoring.externalService - (dict - "name" "prometheus-external" - "cloudProvider" $cloudProvider - ) - ) | nindent 4 - }} - labels: - app: {{$.Release.Name}}-prometheus - name: {{$.Release.Name}}-prometheus-external - name: {{$.Release.Name}}-prometheus-external + app: prometheus + name: prometheus-external + name: prometheus-external + namespace: default spec: {{- include (printf "%s-lb-spec" $cloudProvider) (dict "ip" $.Values.monitoring.externalService.ip) | nindent 2}} - loadBalancerSourceRanges: -{{- range $i, $ip := $.Values.monitoring.externalService.allowedIPs }} - - {{$ip}} -{{- end }} ports: - - name: prometheus - port: 443 + - name: prometheus-external + port: 9090 targetPort: 9090 publishNotReadyAddresses: true selector: - app.kubernetes.io/instance: "{{$.Release.Name}}" - app.kubernetes.io/name: "prometheus" - type: LoadBalancer + app.kubernetes.io/name: prometheus -{{- end }} + type: LoadBalancer {{- end }} {{- end }} diff --git a/deploy/services/tanka/metadata_base.libsonnet b/deploy/services/tanka/metadata_base.libsonnet index 646eda16c..1cf23151b 100644 --- a/deploy/services/tanka/metadata_base.libsonnet +++ b/deploy/services/tanka/metadata_base.libsonnet @@ -75,7 +75,6 @@ image: 'prom/prometheus:v3.8.1', expose_external: false, IP: '', // This is the static external ip address for promethus ingress, leaving blank means your cloud provider will assign an ephemeral IP - whitelist_ip_ranges: error 'must specify whitelisted CIDR IP Blocks, or empty list for fully public access', retention: '15d', storage_size: '100Gi', storageClass: 'standard', diff --git a/deploy/services/tanka/prometheus.libsonnet b/deploy/services/tanka/prometheus.libsonnet index 5cce0831b..5e1dd79d4 100644 --- a/deploy/services/tanka/prometheus.libsonnet +++ b/deploy/services/tanka/prometheus.libsonnet @@ -29,16 +29,30 @@ local PrometheusWebConfig(metadata) = { } }; -local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-external') { - app:: 'prometheus', +local googleExternalLB(metadata, name, ip) = base.Service(metadata, name) { port:: 9090, + app:: 'prometheus', spec+: { type: 'LoadBalancer', - loadBalancerIP: metadata.prometheus.IP, - loadBalancerSourceRanges: metadata.prometheus.whitelist_ip_ranges - } + loadBalancerIP: ip, + }, +}; + +local awsExternalLB(metadata, name, ip) = base.AWSLoadBalancer(metadata, name, [ip], metadata.subnet) { + port:: 9090, + app:: 'prometheus', +}; + +local minikubeExternalLB(metadata, name, ip) = base.Service(metadata, name) { + port:: 9090, + app:: 'prometheus', }; +local externalLB(metadata, name, ip) = + if metadata.cloud_provider == "google" then googleExternalLB(metadata, name, ip) + else if metadata.cloud_provider == "aws" then awsExternalLB(metadata, name, ip) + else if metadata.cloud_provider == "minikube" then minikubeExternalLB(metadata, name, ip); + { all(metadata) : { clusterRole: base.ClusterRole(metadata, 'prometheus') { @@ -231,7 +245,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e ], }, }, - externalService: if metadata.prometheus.expose_external == true then PrometheusExternalService(metadata), + externalLB: if metadata.prometheus.expose_external == true then externalLB(metadata, "prometheus", metadata.prometheus.IP), internalService: base.Service(metadata, 'prometheus-service') { app:: 'prometheus', port:: 9090, From 04862a5be9900e7bf02e3993a4aefac693cf492e Mon Sep 17 00:00:00 2001 From: Maximilien Cuony Date: Tue, 20 Jan 2026 12:05:43 +0100 Subject: [PATCH 2/2] [terraform] Add support for monitoring features --- .../terraform-aws-kubernetes/network_dns.tf | 11 ++++++++ .../terraform-aws-kubernetes/network_lb.tf | 10 +++++++ .../terraform-aws-kubernetes/output.tf | 14 ++++++++++ .../terraform-aws-kubernetes/variables.gen.tf | 14 ++++++++++ .../terraform-commons-dss/helm.tf | 26 +++++++++++++++++-- .../terraform-commons-dss/tanka.tf | 2 ++ .../templates/main.jsonnet.tmp | 2 ++ .../terraform-commons-dss/variables.gen.tf | 25 ++++++++++++++++++ .../variables_internal.tf | 5 ++++ .../terraform-google-kubernetes/cluster.tf | 11 ++++++++ .../terraform-google-kubernetes/dns.tf | 10 +++++++ .../terraform-google-kubernetes/output.tf | 4 +++ .../variables.gen.tf | 14 ++++++++++ .../modules/terraform-aws-dss/TFVARS.gen.md | 11 ++++++++ .../modules/terraform-aws-dss/main.tf | 4 +++ .../terraform.dev.example.tfvars | 4 +++ .../terraform-aws-dss/variables.gen.tf | 25 ++++++++++++++++++ .../terraform-google-dss/TFVARS.gen.md | 11 ++++++++ .../modules/terraform-google-dss/main.tf | 4 +++ .../terraform.dev.example.tfvars | 4 +++ .../terraform-google-dss/variables.gen.tf | 25 ++++++++++++++++++ .../utils/definitions/enable_monitoring.tf | 9 +++++++ .../utils/definitions/prometheus_hostname.tf | 12 +++++++++ deploy/infrastructure/utils/variables.py | 22 ++++++++++++---- deploy/operations/ci/aws-1/variables.gen.tf | 25 ++++++++++++++++++ 25 files changed, 297 insertions(+), 7 deletions(-) create mode 100644 deploy/infrastructure/utils/definitions/enable_monitoring.tf create mode 100644 deploy/infrastructure/utils/definitions/prometheus_hostname.tf diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf index 5310e3ba1..0134d496a 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf @@ -61,3 +61,14 @@ resource "aws_route53_record" "yugabyte_tserver_hostnames" { ttl = 300 records = [each.value] } + +# Public prometheus DNS +resource "aws_route53_record" "prometheus_hostname" { + count = var.prometheus_hostname == "" ? 0 : 1 + + zone_id = var.aws_route53_zone_id + name = var.prometheus_hostname + type = "A" + ttl = 300 + records = [aws_eip.ip_prometheus[count.index].public_ip] +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf index 277eaf99b..a93a0852d 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf @@ -93,3 +93,13 @@ resource "aws_eip" "ip_yugabyte" { ExpectedTServerDNS = format("%s.tserver.%s", count.index, var.db_hostname_suffix) } } + +resource "aws_eip" "ip_prometheus" { + domain = "vpc" + count = var.prometheus_hostname == "" ? 0 : 1 + + tags = { + Name = format("%s-ip-prometheus", var.cluster_name) + ExpectedDNS = var.prometheus_hostname + } +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf index 87f19ebea..15a4acd22 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf @@ -18,6 +18,10 @@ output "ip_gateway" { value = aws_eip.gateway[0].id } +output "ip_prometheus" { + value = length(aws_eip.ip_prometheus) > 0 ? aws_eip.ip_prometheus[0].id : "" +} + output "crdb_nodes" { value = [ for i in aws_eip.ip_crdb : { @@ -82,6 +86,16 @@ output "gateway_address" { } } +output "prometheus_address" { + value = length(aws_eip.ip_prometheus) > 0 ? { + expected_dns : aws_eip.ip_prometheus[0].tags.ExpectedDNS, + address : aws_eip.ip_prometheus[0].public_ip, + } : { + expected_dns : null, + address : null, + } +} + output "workload_subnet" { value = data.aws_subnet.main_subnet.id } diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf index f398fa6e5..93bc5d38f 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf @@ -107,6 +107,20 @@ variable "cluster_name" { EOT } +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} + + variable "kubernetes_version" { type = string description = <<-EOT diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf index 2e247483d..a5340345d 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf @@ -93,7 +93,18 @@ resource "local_file" "helm_chart_values" { } monitoring = { - enabled = false + enabled = var.enable_monitoring + externalService = var.prometheus_hostname != "" ? { + enabled = var.enable_monitoring + ip = var.ip_prometheus + subnet = var.workload_subnet + hostname = var.prometheus_hostname + } : { + enabled = false + ip = null + subnet = null + hostname = null + } }, prometheus = { @@ -289,7 +300,18 @@ resource "local_file" "helm_chart_values" { } monitoring = { - enabled = false + enabled = var.enable_monitoring + externalService = var.prometheus_hostname != "" ? { + enabled = var.enable_monitoring + ip = var.ip_prometheus + subnet = var.workload_subnet + hostname = var.prometheus_hostname + } : { + enabled = false + ip = null + subnet = null + hostname = null + } }, prometheus = { diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf index faa8ec0b0..d3c255c5c 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf @@ -51,6 +51,8 @@ resource "local_file" "tanka_config_main" { VAR_EVICT_RID_TTL = var.evict_rid_ttl VAR_EVICT_RID_ENABLE_ISAS = var.evict_rid_isas VAR_EVICT_RID_ENABLE_SUBSCRIPTIONS = var.evict_rid_subscriptions + VAR_PROMETHEUS_EXPOSE_EXTERNAL = var.prometheus_hostname != "" + VAR_PROMETHEUS_IP = var.ip_prometheus }) filename = "${local.tanka_workspace_location}/main.jsonnet" } diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp index cb9cd2015..fdd685a08 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp @@ -83,6 +83,8 @@ local metadata = metadataBase { }, prometheus+: { storageClass: '${VAR_STORAGE_CLASS}', + expose_external: ${VAR_PROMETHEUS_EXPOSE_EXTERNAL}, + IP: '${VAR_PROMETHEUS_IP}', }, image_pull_secret: '${VAR_DOCKER_IMAGE_PULL_SECRET}', cloud_provider: '${VAR_CLOUD_PROVIDER}', diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf index 8ddb417b8..c4a8f544a 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf @@ -64,6 +64,20 @@ variable "cluster_name" { EOT } +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} + + variable "image" { type = string description = <<-EOT @@ -456,3 +470,14 @@ variable "evict_rid_subscriptions" { } +variable "enable_monitoring" { + type = bool + default = false + description = <<-EOT + Set to true to enable monitoring stack with prometheus / grafana. + + Example: `true` + EOT +} + + diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf index d56ca25f1..a5233aa26 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf @@ -50,6 +50,11 @@ variable "ip_gateway" { description = "IP of the gateway used by the DSS service" } +variable "ip_prometheus" { + type = string + description = "IP of the gateway used by prometheus" +} + variable "kubernetes_storage_class" { type = string description = <<-EOT diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf index a18e67721..e34b06a7d 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf @@ -70,6 +70,17 @@ resource "google_compute_address" "ip_yugabyte" { description = format("%s.%%s.%s", count.index, var.db_hostname_suffix) } +resource "google_compute_address" "ip_prometheus" { + + count = var.prometheus_hostname == "" ? 0 : 1 + + name = format("%s-ip-prometheus", var.cluster_name) + region = local.region + + # Current google terraform provider doesn't allow tags or labels. Description is used to preserve mapping between ips and hostnames. + description = var.prometheus_hostname +} + locals { kubectl_cluster_context_name = format("gke_%s_%s_%s", google_container_cluster.kubernetes_cluster.project, google_container_cluster.kubernetes_cluster.location, google_container_cluster.kubernetes_cluster.name) } diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf index da4d28a1d..cb7598f5c 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf @@ -44,3 +44,13 @@ resource "google_dns_record_set" "yugabyte_tserver" { managed_zone = data.google_dns_managed_zone.default[0].name rrdatas = [google_compute_address.ip_yugabyte[count.index].address] } + +resource "google_dns_record_set" "prometheus_hostname" { + count = var.prometheus_hostname == "" || var.google_dns_managed_zone_name == "" ? 0 : 1 + name = "${google_compute_address.ip_prometheus[0].description}." # description contains the expected hostname + type = "A" + ttl = 300 + + managed_zone = data.google_dns_managed_zone.default[0].name + rrdatas = [google_compute_address.ip_prometheus[0].address] +} diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf index 028793f44..a1c0672d3 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf @@ -67,3 +67,7 @@ output "yugabyte_tservers_nodes" { } ] } + +output "ip_prometheus" { + value = length(google_compute_address.ip_prometheus) > 0 ? google_compute_address.ip_prometheus[0].address : "" +} diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf index 430517e8c..847da1f43 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf @@ -96,6 +96,20 @@ variable "cluster_name" { EOT } +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} + + variable "kubernetes_version" { type = string description = <<-EOT diff --git a/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md b/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md index 96bc9806f..2187a914b 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md +++ b/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md @@ -157,6 +157,11 @@ Use latest to use the latest schema version.

Use latest to use the latest schema version.

Example: 3.1.0


Default value: "latest" + + enable_monitoring (bool) +

Set to true to enable monitoring stack with prometheus / grafana.

+

Example: true

+
Default value: false enable_scd (bool)

Set this boolean true to enable ASTM strategic conflict detection functionality

@@ -249,6 +254,12 @@ DSS instances) value is acceptable.

Currently, only single node or three nodes deployments are supported.

Example: 3

+ + prometheus_hostname (string) +

Domain used to expose prometheus on an external endpoint.

+

Leave empty to disable exposition of prometheus publicly.

+

Example: prometheus.dss.example.com

+
Default value: "" should_init (bool)

Set to false if joining an existing pool, true if creating the first DSS instance diff --git a/deploy/infrastructure/modules/terraform-aws-dss/main.tf b/deploy/infrastructure/modules/terraform-aws-dss/main.tf index 85581914c..907e20725 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/main.tf +++ b/deploy/infrastructure/modules/terraform-aws-dss/main.tf @@ -10,6 +10,7 @@ module "terraform-aws-kubernetes" { aws_iam_permissions_boundary = var.aws_iam_permissions_boundary node_count = var.node_count kubernetes_version = var.kubernetes_version + prometheus_hostname = var.prometheus_hostname source = "../../dependencies/terraform-aws-kubernetes" } @@ -56,6 +57,9 @@ module "terraform-commons-dss" { evict_rid_ttl = var.evict_rid_ttl evict_rid_isas = var.evict_rid_isas evict_rid_subscriptions = var.evict_rid_subscriptions + enable_monitoring = var.enable_monitoring + prometheus_hostname = var.prometheus_hostname + ip_prometheus = module.terraform-aws-kubernetes.ip_prometheus source = "../../dependencies/terraform-commons-dss" } diff --git a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars index 4ed5bce5f..76b30a8f5 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars +++ b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars @@ -39,3 +39,7 @@ yugabyte_region = "aws-uss-1" yugabyte_zone = "aws-uss-1" yugabyte_light_resources = false yugabyte_external_nodes = [] + +# Monitoring +enable_monitoring = false +# prometheus_hostname = "prometheus.dss.interuss.example.com" diff --git a/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf b/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf index aa3e8e85c..50a59a3ad 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf +++ b/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf @@ -107,6 +107,20 @@ variable "cluster_name" { EOT } +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} + + variable "kubernetes_version" { type = string description = <<-EOT @@ -555,3 +569,14 @@ variable "evict_rid_subscriptions" { } +variable "enable_monitoring" { + type = bool + default = false + description = <<-EOT + Set to true to enable monitoring stack with prometheus / grafana. + + Example: `true` + EOT +} + + diff --git a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md index cc3c44c1a..2a60c1491 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md +++ b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md @@ -126,6 +126,11 @@ Use latest to use the latest schema version.

Use latest to use the latest schema version.

Example: 3.1.0


Default value: "latest" + + enable_monitoring (bool) +

Set to true to enable monitoring stack with prometheus / grafana.

+

Example: true

+
Default value: false enable_scd (bool)

Set this boolean true to enable ASTM strategic conflict detection functionality

@@ -245,6 +250,12 @@ DSS instances) value is acceptable.

Currently, only single node or three nodes deployments are supported.

Example: 3

+ + prometheus_hostname (string) +

Domain used to expose prometheus on an external endpoint.

+

Leave empty to disable exposition of prometheus publicly.

+

Example: prometheus.dss.example.com

+
Default value: "" should_init (bool)

Set to false if joining an existing pool, true if creating the first DSS instance diff --git a/deploy/infrastructure/modules/terraform-google-dss/main.tf b/deploy/infrastructure/modules/terraform-google-dss/main.tf index 88969912c..9888544f3 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/main.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/main.tf @@ -10,6 +10,7 @@ module "terraform-google-kubernetes" { google_machine_type = var.google_machine_type node_count = var.node_count kubernetes_version = var.kubernetes_version + prometheus_hostname = var.prometheus_hostname source = "../../dependencies/terraform-google-kubernetes" } @@ -55,6 +56,9 @@ module "terraform-commons-dss" { evict_rid_ttl = var.evict_rid_ttl evict_rid_isas = var.evict_rid_isas evict_rid_subscriptions = var.evict_rid_subscriptions + enable_monitoring = var.enable_monitoring + prometheus_hostname = var.prometheus_hostname + ip_prometheus = module.terraform-google-kubernetes.ip_prometheus source = "../../dependencies/terraform-commons-dss" } diff --git a/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars index f4bed5658..6a0944d54 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars +++ b/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars @@ -40,3 +40,7 @@ yugabyte_region = "gcp-uss-1" yugabyte_zone = "gcp-uss-1" yugabyte_light_resources = false yugabyte_external_nodes = [] + +# Monitoring +enable_monitoring = false +# prometheus_hostname = "prometheus.dss.interuss.example.com" diff --git a/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf b/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf index 13d033401..c43e8f5ee 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf @@ -96,6 +96,20 @@ variable "cluster_name" { EOT } +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} + + variable "kubernetes_version" { type = string description = <<-EOT @@ -544,3 +558,14 @@ variable "evict_rid_subscriptions" { } +variable "enable_monitoring" { + type = bool + default = false + description = <<-EOT + Set to true to enable monitoring stack with prometheus / grafana. + + Example: `true` + EOT +} + + diff --git a/deploy/infrastructure/utils/definitions/enable_monitoring.tf b/deploy/infrastructure/utils/definitions/enable_monitoring.tf new file mode 100644 index 000000000..de1c503d0 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/enable_monitoring.tf @@ -0,0 +1,9 @@ +variable "enable_monitoring" { + type = bool + default = false + description = <<-EOT + Set to true to enable monitoring stack with prometheus / grafana. + + Example: `true` + EOT +} diff --git a/deploy/infrastructure/utils/definitions/prometheus_hostname.tf b/deploy/infrastructure/utils/definitions/prometheus_hostname.tf new file mode 100644 index 000000000..ec0179da5 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/prometheus_hostname.tf @@ -0,0 +1,12 @@ +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} diff --git a/deploy/infrastructure/utils/variables.py b/deploy/infrastructure/utils/variables.py index 751ee6898..f63971f55 100755 --- a/deploy/infrastructure/utils/variables.py +++ b/deploy/infrastructure/utils/variables.py @@ -23,7 +23,14 @@ # Variables per project # For all */terraform-* -GLOBAL_VARIABLES = ["app_hostname", "db_hostname_suffix", "datastore_type", "node_count", "cluster_name"] +GLOBAL_VARIABLES = [ + "app_hostname", + "db_hostname_suffix", + "datastore_type", + "node_count", + "cluster_name", + "prometheus_hostname", +] # dependencies/terraform-commons-dss COMMONS_DSS_VARIABLES = GLOBAL_VARIABLES + [ @@ -55,6 +62,7 @@ "evict_rid_ttl", "evict_rid_isas", "evict_rid_subscriptions", + "enable_monitoring", ] # dependencies/terraform-*-kubernetes @@ -91,11 +99,13 @@ # modules/terraform-aws-dss AWS_MODULE_VARIABLES = ( - AWS_KUBERNETES_VARIABLES + [ + AWS_KUBERNETES_VARIABLES + + [ "aws_kubernetes_storage_class", "crdb_hostname_suffix", "crdb_locality", - ] + COMMONS_DSS_VARIABLES + ] + + COMMONS_DSS_VARIABLES ) PROJECT_VARIABLES = { @@ -246,7 +256,9 @@ def simplify_type(value_type): description, value_type, default_value = parse_definition(v, definitions[v]) formatted_value_type = f"{simplify_type(value_type)}" formatted_default_value = ( - f"
Default value: {default_value}" if default_value is not None else "" + f"
Default value: {default_value}" + if default_value is not None + else "" ) formatted_description = marko.convert(description) content += f""" @@ -262,7 +274,7 @@ def simplify_type(value_type): """.strip() if has_internal_vars: - content += f"## Internal variables\n\n" + content += "## Internal variables\n\n" content += f"This module requires additional variables, see [{INTERNAL_VARIABLES_FILENAME}](./{INTERNAL_VARIABLES_FILENAME}) for details" return content diff --git a/deploy/operations/ci/aws-1/variables.gen.tf b/deploy/operations/ci/aws-1/variables.gen.tf index aa3e8e85c..50a59a3ad 100644 --- a/deploy/operations/ci/aws-1/variables.gen.tf +++ b/deploy/operations/ci/aws-1/variables.gen.tf @@ -107,6 +107,20 @@ variable "cluster_name" { EOT } +variable "prometheus_hostname" { + type = string + default = "" + description = <<-EOT + Domain used to expose prometheus on an external endpoint. + + Leave empty to disable exposition of prometheus publicly. + + Example: `prometheus.dss.example.com` + + EOT +} + + variable "kubernetes_version" { type = string description = <<-EOT @@ -555,3 +569,14 @@ variable "evict_rid_subscriptions" { } +variable "enable_monitoring" { + type = bool + default = false + description = <<-EOT + Set to true to enable monitoring stack with prometheus / grafana. + + Example: `true` + EOT +} + +