From 6fb58a222953e2a8b80950c7e4f61b864d84449e Mon Sep 17 00:00:00 2001
From: Maximilien Cuony
Date: Tue, 20 Jan 2026 09:38:31 +0100
Subject: [PATCH 1/2] [monitoring] Expose prometheus as TCP services
---
.../dss/templates/_networking-google.tpl | 3 +
.../templates/prometheus-loadbalancers.yaml | 103 +++---------------
deploy/services/tanka/metadata_base.libsonnet | 1 -
deploy/services/tanka/prometheus.libsonnet | 26 ++++-
4 files changed, 36 insertions(+), 97 deletions(-)
diff --git a/deploy/services/helm-charts/dss/templates/_networking-google.tpl b/deploy/services/helm-charts/dss/templates/_networking-google.tpl
index a9cdc412f..09764ba9f 100644
--- a/deploy/services/helm-charts/dss/templates/_networking-google.tpl
+++ b/deploy/services/helm-charts/dss/templates/_networking-google.tpl
@@ -1,3 +1,6 @@
+{{- define "google-lb-default-annotations" -}}
+{{- end -}}
+
{{- define "google-lb-crdb-annotations" -}}
{{- end -}}
diff --git a/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml b/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml
index c06cb7d46..90dd46033 100644
--- a/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml
+++ b/deploy/services/helm-charts/dss/templates/prometheus-loadbalancers.yaml
@@ -3,113 +3,36 @@
{{- if $.Values.monitoring.enabled }}
{{- if $.Values.monitoring.externalService.enabled }}
-{{- if eq $cloudProvider "google" }}
-
----
-apiVersion: cloud.google.com/v1
-kind: BackendConfig
-metadata:
- name: prometheus-external
-spec:
- securityPolicy:
- name: "{{ $.Values.monitoring.externalService.allowedIPsPolicy }}"
-
---
apiVersion: v1
kind: Service
-metadata:
- labels:
- app: {{$.Release.Name}}-prometheus
- name: {{$.Release.Name}}-prometheus-external
- annotations:
- cloud.google.com/backend-config: '{"default": "prometheus-external"}'
- name: {{$.Release.Name}}-prometheus-external
-spec:
- ports:
- - name: prometheus
- port: 9090
- targetPort: 9090
- publishNotReadyAddresses: true
- selector:
- app.kubernetes.io/instance: "{{$.Release.Name}}"
- app.kubernetes.io/name: "prometheus"
- type: ClusterIP
-
----
-apiVersion: networking.k8s.io/v1
-kind: Ingress
metadata:
annotations:
- {{- include (printf "%s-ingress-prometheus-annotations" $cloudProvider)
+ service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
+ {{- include (printf "%s-lb-default-annotations" $cloudProvider)
(dict
- "certName" (printf "%s-prometheus-https-certificate" $.Release.Name)
+ "name" "prometheus-external"
"ip" $.Values.monitoring.externalService.ip
- "frontendConfig" (empty .sslPolicy | ternary "" "ssl-frontend-config")
+ "subnet" $.Values.monitoring.externalService.subnet
+ "cloudProvider" $cloudProvider
) | nindent 4
}}
labels:
- name: {{$.Release.Name}}-prometheus-https-ingress
- name: {{$.Release.Name}}-prometheus-https-ingress
-spec:
- {{- include (printf "%s-ingress-spec" $cloudProvider) . | nindent 2 }}
- rules:
- - http:
- paths:
- - path: /
- pathType: Prefix
- backend:
- service:
- name: {{$.Release.Name}}-prometheus-external
- port:
- number: 9090
-
----
-apiVersion: networking.gke.io/v1
-kind: ManagedCertificate
-metadata:
- labels:
- name: {{$.Release.Name}}-prometheus-https-certificate
- name: {{$.Release.Name}}-prometheus-https-certificate
-spec:
- domains:
- - {{ $.Values.monitoring.externalService.hostname }}
-
-{{- else }}
-
----
-apiVersion: v1
-kind: Service
-metadata:
- annotations:
- {{- include (printf "%s-ingress-prometheus-annotations" $cloudProvider)
- (merge $.Values.monitoring.externalService
- (dict
- "name" "prometheus-external"
- "cloudProvider" $cloudProvider
- )
- ) | nindent 4
- }}
- labels:
- app: {{$.Release.Name}}-prometheus
- name: {{$.Release.Name}}-prometheus-external
- name: {{$.Release.Name}}-prometheus-external
+ app: prometheus
+ name: prometheus-external
+ name: prometheus-external
+ namespace: default
spec:
{{- include (printf "%s-lb-spec" $cloudProvider) (dict "ip" $.Values.monitoring.externalService.ip) | nindent 2}}
- loadBalancerSourceRanges:
-{{- range $i, $ip := $.Values.monitoring.externalService.allowedIPs }}
- - {{$ip}}
-{{- end }}
ports:
- - name: prometheus
- port: 443
+ - name: prometheus-external
+ port: 9090
targetPort: 9090
publishNotReadyAddresses: true
selector:
- app.kubernetes.io/instance: "{{$.Release.Name}}"
- app.kubernetes.io/name: "prometheus"
- type: LoadBalancer
+ app.kubernetes.io/name: prometheus
-{{- end }}
+ type: LoadBalancer
{{- end }}
{{- end }}
diff --git a/deploy/services/tanka/metadata_base.libsonnet b/deploy/services/tanka/metadata_base.libsonnet
index 646eda16c..1cf23151b 100644
--- a/deploy/services/tanka/metadata_base.libsonnet
+++ b/deploy/services/tanka/metadata_base.libsonnet
@@ -75,7 +75,6 @@
image: 'prom/prometheus:v3.8.1',
expose_external: false,
IP: '', // This is the static external ip address for promethus ingress, leaving blank means your cloud provider will assign an ephemeral IP
- whitelist_ip_ranges: error 'must specify whitelisted CIDR IP Blocks, or empty list for fully public access',
retention: '15d',
storage_size: '100Gi',
storageClass: 'standard',
diff --git a/deploy/services/tanka/prometheus.libsonnet b/deploy/services/tanka/prometheus.libsonnet
index 5cce0831b..5e1dd79d4 100644
--- a/deploy/services/tanka/prometheus.libsonnet
+++ b/deploy/services/tanka/prometheus.libsonnet
@@ -29,16 +29,30 @@ local PrometheusWebConfig(metadata) = {
}
};
-local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-external') {
- app:: 'prometheus',
+local googleExternalLB(metadata, name, ip) = base.Service(metadata, name) {
port:: 9090,
+ app:: 'prometheus',
spec+: {
type: 'LoadBalancer',
- loadBalancerIP: metadata.prometheus.IP,
- loadBalancerSourceRanges: metadata.prometheus.whitelist_ip_ranges
- }
+ loadBalancerIP: ip,
+ },
+};
+
+local awsExternalLB(metadata, name, ip) = base.AWSLoadBalancer(metadata, name, [ip], metadata.subnet) {
+ port:: 9090,
+ app:: 'prometheus',
+};
+
+local minikubeExternalLB(metadata, name, ip) = base.Service(metadata, name) {
+ port:: 9090,
+ app:: 'prometheus',
};
+local externalLB(metadata, name, ip) =
+ if metadata.cloud_provider == "google" then googleExternalLB(metadata, name, ip)
+ else if metadata.cloud_provider == "aws" then awsExternalLB(metadata, name, ip)
+ else if metadata.cloud_provider == "minikube" then minikubeExternalLB(metadata, name, ip);
+
{
all(metadata) : {
clusterRole: base.ClusterRole(metadata, 'prometheus') {
@@ -231,7 +245,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
],
},
},
- externalService: if metadata.prometheus.expose_external == true then PrometheusExternalService(metadata),
+ externalLB: if metadata.prometheus.expose_external == true then externalLB(metadata, "prometheus", metadata.prometheus.IP),
internalService: base.Service(metadata, 'prometheus-service') {
app:: 'prometheus',
port:: 9090,
From 04862a5be9900e7bf02e3993a4aefac693cf492e Mon Sep 17 00:00:00 2001
From: Maximilien Cuony
Date: Tue, 20 Jan 2026 12:05:43 +0100
Subject: [PATCH 2/2] [terraform] Add support for monitoring features
---
.../terraform-aws-kubernetes/network_dns.tf | 11 ++++++++
.../terraform-aws-kubernetes/network_lb.tf | 10 +++++++
.../terraform-aws-kubernetes/output.tf | 14 ++++++++++
.../terraform-aws-kubernetes/variables.gen.tf | 14 ++++++++++
.../terraform-commons-dss/helm.tf | 26 +++++++++++++++++--
.../terraform-commons-dss/tanka.tf | 2 ++
.../templates/main.jsonnet.tmp | 2 ++
.../terraform-commons-dss/variables.gen.tf | 25 ++++++++++++++++++
.../variables_internal.tf | 5 ++++
.../terraform-google-kubernetes/cluster.tf | 11 ++++++++
.../terraform-google-kubernetes/dns.tf | 10 +++++++
.../terraform-google-kubernetes/output.tf | 4 +++
.../variables.gen.tf | 14 ++++++++++
.../modules/terraform-aws-dss/TFVARS.gen.md | 11 ++++++++
.../modules/terraform-aws-dss/main.tf | 4 +++
.../terraform.dev.example.tfvars | 4 +++
.../terraform-aws-dss/variables.gen.tf | 25 ++++++++++++++++++
.../terraform-google-dss/TFVARS.gen.md | 11 ++++++++
.../modules/terraform-google-dss/main.tf | 4 +++
.../terraform.dev.example.tfvars | 4 +++
.../terraform-google-dss/variables.gen.tf | 25 ++++++++++++++++++
.../utils/definitions/enable_monitoring.tf | 9 +++++++
.../utils/definitions/prometheus_hostname.tf | 12 +++++++++
deploy/infrastructure/utils/variables.py | 22 ++++++++++++----
deploy/operations/ci/aws-1/variables.gen.tf | 25 ++++++++++++++++++
25 files changed, 297 insertions(+), 7 deletions(-)
create mode 100644 deploy/infrastructure/utils/definitions/enable_monitoring.tf
create mode 100644 deploy/infrastructure/utils/definitions/prometheus_hostname.tf
diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf
index 5310e3ba1..0134d496a 100644
--- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf
+++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf
@@ -61,3 +61,14 @@ resource "aws_route53_record" "yugabyte_tserver_hostnames" {
ttl = 300
records = [each.value]
}
+
+# Public prometheus DNS
+resource "aws_route53_record" "prometheus_hostname" {
+ count = var.prometheus_hostname == "" ? 0 : 1
+
+ zone_id = var.aws_route53_zone_id
+ name = var.prometheus_hostname
+ type = "A"
+ ttl = 300
+ records = [aws_eip.ip_prometheus[count.index].public_ip]
+}
diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf
index 277eaf99b..a93a0852d 100644
--- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf
+++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf
@@ -93,3 +93,13 @@ resource "aws_eip" "ip_yugabyte" {
ExpectedTServerDNS = format("%s.tserver.%s", count.index, var.db_hostname_suffix)
}
}
+
+resource "aws_eip" "ip_prometheus" {
+ domain = "vpc"
+ count = var.prometheus_hostname == "" ? 0 : 1
+
+ tags = {
+ Name = format("%s-ip-prometheus", var.cluster_name)
+ ExpectedDNS = var.prometheus_hostname
+ }
+}
diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf
index 87f19ebea..15a4acd22 100644
--- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf
+++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf
@@ -18,6 +18,10 @@ output "ip_gateway" {
value = aws_eip.gateway[0].id
}
+output "ip_prometheus" {
+ value = length(aws_eip.ip_prometheus) > 0 ? aws_eip.ip_prometheus[0].id : ""
+}
+
output "crdb_nodes" {
value = [
for i in aws_eip.ip_crdb : {
@@ -82,6 +86,16 @@ output "gateway_address" {
}
}
+output "prometheus_address" {
+ value = length(aws_eip.ip_prometheus) > 0 ? {
+ expected_dns : aws_eip.ip_prometheus[0].tags.ExpectedDNS,
+ address : aws_eip.ip_prometheus[0].public_ip,
+ } : {
+ expected_dns : null,
+ address : null,
+ }
+}
+
output "workload_subnet" {
value = data.aws_subnet.main_subnet.id
}
diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf
index f398fa6e5..93bc5d38f 100644
--- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf
+++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf
@@ -107,6 +107,20 @@ variable "cluster_name" {
EOT
}
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
+
+
variable "kubernetes_version" {
type = string
description = <<-EOT
diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf
index 2e247483d..a5340345d 100644
--- a/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf
+++ b/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf
@@ -93,7 +93,18 @@ resource "local_file" "helm_chart_values" {
}
monitoring = {
- enabled = false
+ enabled = var.enable_monitoring
+ externalService = var.prometheus_hostname != "" ? {
+ enabled = var.enable_monitoring
+ ip = var.ip_prometheus
+ subnet = var.workload_subnet
+ hostname = var.prometheus_hostname
+ } : {
+ enabled = false
+ ip = null
+ subnet = null
+ hostname = null
+ }
},
prometheus = {
@@ -289,7 +300,18 @@ resource "local_file" "helm_chart_values" {
}
monitoring = {
- enabled = false
+ enabled = var.enable_monitoring
+ externalService = var.prometheus_hostname != "" ? {
+ enabled = var.enable_monitoring
+ ip = var.ip_prometheus
+ subnet = var.workload_subnet
+ hostname = var.prometheus_hostname
+ } : {
+ enabled = false
+ ip = null
+ subnet = null
+ hostname = null
+ }
},
prometheus = {
diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf
index faa8ec0b0..d3c255c5c 100644
--- a/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf
+++ b/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf
@@ -51,6 +51,8 @@ resource "local_file" "tanka_config_main" {
VAR_EVICT_RID_TTL = var.evict_rid_ttl
VAR_EVICT_RID_ENABLE_ISAS = var.evict_rid_isas
VAR_EVICT_RID_ENABLE_SUBSCRIPTIONS = var.evict_rid_subscriptions
+ VAR_PROMETHEUS_EXPOSE_EXTERNAL = var.prometheus_hostname != ""
+ VAR_PROMETHEUS_IP = var.ip_prometheus
})
filename = "${local.tanka_workspace_location}/main.jsonnet"
}
diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp
index cb9cd2015..fdd685a08 100644
--- a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp
+++ b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp
@@ -83,6 +83,8 @@ local metadata = metadataBase {
},
prometheus+: {
storageClass: '${VAR_STORAGE_CLASS}',
+ expose_external: ${VAR_PROMETHEUS_EXPOSE_EXTERNAL},
+ IP: '${VAR_PROMETHEUS_IP}',
},
image_pull_secret: '${VAR_DOCKER_IMAGE_PULL_SECRET}',
cloud_provider: '${VAR_CLOUD_PROVIDER}',
diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf
index 8ddb417b8..c4a8f544a 100644
--- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf
+++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf
@@ -64,6 +64,20 @@ variable "cluster_name" {
EOT
}
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
+
+
variable "image" {
type = string
description = <<-EOT
@@ -456,3 +470,14 @@ variable "evict_rid_subscriptions" {
}
+variable "enable_monitoring" {
+ type = bool
+ default = false
+ description = <<-EOT
+ Set to true to enable monitoring stack with prometheus / grafana.
+
+ Example: `true`
+ EOT
+}
+
+
diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf
index d56ca25f1..a5233aa26 100644
--- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf
+++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf
@@ -50,6 +50,11 @@ variable "ip_gateway" {
description = "IP of the gateway used by the DSS service"
}
+variable "ip_prometheus" {
+ type = string
+ description = "IP of the gateway used by prometheus"
+}
+
variable "kubernetes_storage_class" {
type = string
description = <<-EOT
diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf
index a18e67721..e34b06a7d 100644
--- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf
+++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf
@@ -70,6 +70,17 @@ resource "google_compute_address" "ip_yugabyte" {
description = format("%s.%%s.%s", count.index, var.db_hostname_suffix)
}
+resource "google_compute_address" "ip_prometheus" {
+
+ count = var.prometheus_hostname == "" ? 0 : 1
+
+ name = format("%s-ip-prometheus", var.cluster_name)
+ region = local.region
+
+ # Current google terraform provider doesn't allow tags or labels. Description is used to preserve mapping between ips and hostnames.
+ description = var.prometheus_hostname
+}
+
locals {
kubectl_cluster_context_name = format("gke_%s_%s_%s", google_container_cluster.kubernetes_cluster.project, google_container_cluster.kubernetes_cluster.location, google_container_cluster.kubernetes_cluster.name)
}
diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf
index da4d28a1d..cb7598f5c 100644
--- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf
+++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf
@@ -44,3 +44,13 @@ resource "google_dns_record_set" "yugabyte_tserver" {
managed_zone = data.google_dns_managed_zone.default[0].name
rrdatas = [google_compute_address.ip_yugabyte[count.index].address]
}
+
+resource "google_dns_record_set" "prometheus_hostname" {
+ count = var.prometheus_hostname == "" || var.google_dns_managed_zone_name == "" ? 0 : 1
+ name = "${google_compute_address.ip_prometheus[0].description}." # description contains the expected hostname
+ type = "A"
+ ttl = 300
+
+ managed_zone = data.google_dns_managed_zone.default[0].name
+ rrdatas = [google_compute_address.ip_prometheus[0].address]
+}
diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf
index 028793f44..a1c0672d3 100644
--- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf
+++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf
@@ -67,3 +67,7 @@ output "yugabyte_tservers_nodes" {
}
]
}
+
+output "ip_prometheus" {
+ value = length(google_compute_address.ip_prometheus) > 0 ? google_compute_address.ip_prometheus[0].address : ""
+}
diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf
index 430517e8c..847da1f43 100644
--- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf
+++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf
@@ -96,6 +96,20 @@ variable "cluster_name" {
EOT
}
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
+
+
variable "kubernetes_version" {
type = string
description = <<-EOT
diff --git a/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md b/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md
index 96bc9806f..2187a914b 100644
--- a/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md
+++ b/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md
@@ -157,6 +157,11 @@ Use latest to use the latest schema version.
Use latest to use the latest schema version.
Example: 3.1.0
Default value: "latest"
+
+ enable_monitoring (bool) |
+ Set to true to enable monitoring stack with prometheus / grafana.
+Example: true
+ Default value: false |
enable_scd (bool) |
Set this boolean true to enable ASTM strategic conflict detection functionality
@@ -249,6 +254,12 @@ DSS instances) value is acceptable.
Currently, only single node or three nodes deployments are supported.
Example: 3
|
+
+ prometheus_hostname (string) |
+ Domain used to expose prometheus on an external endpoint.
+Leave empty to disable exposition of prometheus publicly.
+Example: prometheus.dss.example.com
+ Default value: "" |
should_init (bool) |
Set to false if joining an existing pool, true if creating the first DSS instance
diff --git a/deploy/infrastructure/modules/terraform-aws-dss/main.tf b/deploy/infrastructure/modules/terraform-aws-dss/main.tf
index 85581914c..907e20725 100644
--- a/deploy/infrastructure/modules/terraform-aws-dss/main.tf
+++ b/deploy/infrastructure/modules/terraform-aws-dss/main.tf
@@ -10,6 +10,7 @@ module "terraform-aws-kubernetes" {
aws_iam_permissions_boundary = var.aws_iam_permissions_boundary
node_count = var.node_count
kubernetes_version = var.kubernetes_version
+ prometheus_hostname = var.prometheus_hostname
source = "../../dependencies/terraform-aws-kubernetes"
}
@@ -56,6 +57,9 @@ module "terraform-commons-dss" {
evict_rid_ttl = var.evict_rid_ttl
evict_rid_isas = var.evict_rid_isas
evict_rid_subscriptions = var.evict_rid_subscriptions
+ enable_monitoring = var.enable_monitoring
+ prometheus_hostname = var.prometheus_hostname
+ ip_prometheus = module.terraform-aws-kubernetes.ip_prometheus
source = "../../dependencies/terraform-commons-dss"
}
diff --git a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars
index 4ed5bce5f..76b30a8f5 100644
--- a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars
+++ b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars
@@ -39,3 +39,7 @@ yugabyte_region = "aws-uss-1"
yugabyte_zone = "aws-uss-1"
yugabyte_light_resources = false
yugabyte_external_nodes = []
+
+# Monitoring
+enable_monitoring = false
+# prometheus_hostname = "prometheus.dss.interuss.example.com"
diff --git a/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf b/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf
index aa3e8e85c..50a59a3ad 100644
--- a/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf
+++ b/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf
@@ -107,6 +107,20 @@ variable "cluster_name" {
EOT
}
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
+
+
variable "kubernetes_version" {
type = string
description = <<-EOT
@@ -555,3 +569,14 @@ variable "evict_rid_subscriptions" {
}
+variable "enable_monitoring" {
+ type = bool
+ default = false
+ description = <<-EOT
+ Set to true to enable monitoring stack with prometheus / grafana.
+
+ Example: `true`
+ EOT
+}
+
+
diff --git a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md
index cc3c44c1a..2a60c1491 100644
--- a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md
+++ b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md
@@ -126,6 +126,11 @@ Use latest to use the latest schema version.
Use latest to use the latest schema version.
Example: 3.1.0
Default value: "latest" |
+
+ enable_monitoring (bool) |
+ Set to true to enable monitoring stack with prometheus / grafana.
+Example: true
+ Default value: false |
enable_scd (bool) |
Set this boolean true to enable ASTM strategic conflict detection functionality
@@ -245,6 +250,12 @@ DSS instances) value is acceptable.
Currently, only single node or three nodes deployments are supported.
Example: 3
|
+
+ prometheus_hostname (string) |
+ Domain used to expose prometheus on an external endpoint.
+Leave empty to disable exposition of prometheus publicly.
+Example: prometheus.dss.example.com
+ Default value: "" |
should_init (bool) |
Set to false if joining an existing pool, true if creating the first DSS instance
diff --git a/deploy/infrastructure/modules/terraform-google-dss/main.tf b/deploy/infrastructure/modules/terraform-google-dss/main.tf
index 88969912c..9888544f3 100644
--- a/deploy/infrastructure/modules/terraform-google-dss/main.tf
+++ b/deploy/infrastructure/modules/terraform-google-dss/main.tf
@@ -10,6 +10,7 @@ module "terraform-google-kubernetes" {
google_machine_type = var.google_machine_type
node_count = var.node_count
kubernetes_version = var.kubernetes_version
+ prometheus_hostname = var.prometheus_hostname
source = "../../dependencies/terraform-google-kubernetes"
}
@@ -55,6 +56,9 @@ module "terraform-commons-dss" {
evict_rid_ttl = var.evict_rid_ttl
evict_rid_isas = var.evict_rid_isas
evict_rid_subscriptions = var.evict_rid_subscriptions
+ enable_monitoring = var.enable_monitoring
+ prometheus_hostname = var.prometheus_hostname
+ ip_prometheus = module.terraform-google-kubernetes.ip_prometheus
source = "../../dependencies/terraform-commons-dss"
}
diff --git a/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars
index f4bed5658..6a0944d54 100644
--- a/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars
+++ b/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars
@@ -40,3 +40,7 @@ yugabyte_region = "gcp-uss-1"
yugabyte_zone = "gcp-uss-1"
yugabyte_light_resources = false
yugabyte_external_nodes = []
+
+# Monitoring
+enable_monitoring = false
+# prometheus_hostname = "prometheus.dss.interuss.example.com"
diff --git a/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf b/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf
index 13d033401..c43e8f5ee 100644
--- a/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf
+++ b/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf
@@ -96,6 +96,20 @@ variable "cluster_name" {
EOT
}
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
+
+
variable "kubernetes_version" {
type = string
description = <<-EOT
@@ -544,3 +558,14 @@ variable "evict_rid_subscriptions" {
}
+variable "enable_monitoring" {
+ type = bool
+ default = false
+ description = <<-EOT
+ Set to true to enable monitoring stack with prometheus / grafana.
+
+ Example: `true`
+ EOT
+}
+
+
diff --git a/deploy/infrastructure/utils/definitions/enable_monitoring.tf b/deploy/infrastructure/utils/definitions/enable_monitoring.tf
new file mode 100644
index 000000000..de1c503d0
--- /dev/null
+++ b/deploy/infrastructure/utils/definitions/enable_monitoring.tf
@@ -0,0 +1,9 @@
+variable "enable_monitoring" {
+ type = bool
+ default = false
+ description = <<-EOT
+ Set to true to enable monitoring stack with prometheus / grafana.
+
+ Example: `true`
+ EOT
+}
diff --git a/deploy/infrastructure/utils/definitions/prometheus_hostname.tf b/deploy/infrastructure/utils/definitions/prometheus_hostname.tf
new file mode 100644
index 000000000..ec0179da5
--- /dev/null
+++ b/deploy/infrastructure/utils/definitions/prometheus_hostname.tf
@@ -0,0 +1,12 @@
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
diff --git a/deploy/infrastructure/utils/variables.py b/deploy/infrastructure/utils/variables.py
index 751ee6898..f63971f55 100755
--- a/deploy/infrastructure/utils/variables.py
+++ b/deploy/infrastructure/utils/variables.py
@@ -23,7 +23,14 @@
# Variables per project
# For all */terraform-*
-GLOBAL_VARIABLES = ["app_hostname", "db_hostname_suffix", "datastore_type", "node_count", "cluster_name"]
+GLOBAL_VARIABLES = [
+ "app_hostname",
+ "db_hostname_suffix",
+ "datastore_type",
+ "node_count",
+ "cluster_name",
+ "prometheus_hostname",
+]
# dependencies/terraform-commons-dss
COMMONS_DSS_VARIABLES = GLOBAL_VARIABLES + [
@@ -55,6 +62,7 @@
"evict_rid_ttl",
"evict_rid_isas",
"evict_rid_subscriptions",
+ "enable_monitoring",
]
# dependencies/terraform-*-kubernetes
@@ -91,11 +99,13 @@
# modules/terraform-aws-dss
AWS_MODULE_VARIABLES = (
- AWS_KUBERNETES_VARIABLES + [
+ AWS_KUBERNETES_VARIABLES
+ + [
"aws_kubernetes_storage_class",
"crdb_hostname_suffix",
"crdb_locality",
- ] + COMMONS_DSS_VARIABLES
+ ]
+ + COMMONS_DSS_VARIABLES
)
PROJECT_VARIABLES = {
@@ -246,7 +256,9 @@ def simplify_type(value_type):
description, value_type, default_value = parse_definition(v, definitions[v])
formatted_value_type = f"{simplify_type(value_type)}"
formatted_default_value = (
- f" Default value: {default_value}" if default_value is not None else ""
+ f" Default value: {default_value}"
+ if default_value is not None
+ else ""
)
formatted_description = marko.convert(description)
content += f"""
@@ -262,7 +274,7 @@ def simplify_type(value_type):
""".strip()
if has_internal_vars:
- content += f"## Internal variables\n\n"
+ content += "## Internal variables\n\n"
content += f"This module requires additional variables, see [{INTERNAL_VARIABLES_FILENAME}](./{INTERNAL_VARIABLES_FILENAME}) for details"
return content
diff --git a/deploy/operations/ci/aws-1/variables.gen.tf b/deploy/operations/ci/aws-1/variables.gen.tf
index aa3e8e85c..50a59a3ad 100644
--- a/deploy/operations/ci/aws-1/variables.gen.tf
+++ b/deploy/operations/ci/aws-1/variables.gen.tf
@@ -107,6 +107,20 @@ variable "cluster_name" {
EOT
}
+variable "prometheus_hostname" {
+ type = string
+ default = ""
+ description = <<-EOT
+ Domain used to expose prometheus on an external endpoint.
+
+ Leave empty to disable exposition of prometheus publicly.
+
+ Example: `prometheus.dss.example.com`
+
+ EOT
+}
+
+
variable "kubernetes_version" {
type = string
description = <<-EOT
@@ -555,3 +569,14 @@ variable "evict_rid_subscriptions" {
}
+variable "enable_monitoring" {
+ type = bool
+ default = false
+ description = <<-EOT
+ Set to true to enable monitoring stack with prometheus / grafana.
+
+ Example: `true`
+ EOT
+}
+
+
|