From 1b0aa6170fd6c20cdf78de033d5bfaea60ba0792 Mon Sep 17 00:00:00 2001 From: Maximilien Cuony Date: Tue, 20 Jan 2026 11:20:02 +0100 Subject: [PATCH] [monitoring] Use certificate for grafana and prometheus --- deploy/services/helm-charts/dss/values.yaml | 39 +++++++++++++++++---- deploy/services/tanka/grafana.libsonnet | 23 +++++++++++- deploy/services/tanka/prometheus.libsonnet | 36 ++++++++++++------- docs/operations/monitoring.md | 28 +++++++++++++++ 4 files changed, 106 insertions(+), 20 deletions(-) diff --git a/deploy/services/helm-charts/dss/values.yaml b/deploy/services/helm-charts/dss/values.yaml index 997c76e87..6b3eaf9a7 100644 --- a/deploy/services/helm-charts/dss/values.yaml +++ b/deploy/services/helm-charts/dss/values.yaml @@ -85,12 +85,17 @@ prometheus: evaluation_interval: 5s podAnnotations: - serverFilesVersion: "1" # Since the prometheus helm chart does not detect changes in server files, this value is used to force the redeployment following the update of the serverFiles defined below. + serverFilesVersion: "2" # Since the prometheus helm chart does not detect changes in server files, this value is used to force the redeployment following the update of the serverFiles defined below. - service: - annotations: - 'prometheus.io/scrape': 'true' - 'prometheus.io/port': '9090' + extraFlags: + - "web.config.file=/etc/config/web-config.yml" + + extraSecretMounts: + - name: prometheus-certs + secretName: 'monitoring.prometheus.certs' + mountPath: /certs/ + + tcpSocketProbeEnabled: true prometheus-pushgateway: enabled: false @@ -104,7 +109,14 @@ prometheus: alertmanager: enabled: false - serverFiles: # Caution: Since the prometheus helm chart does not detect changes in server files, update the `prometheus.server.podAnnotations.serverFilesVersion` to force the redeployment of prometheus. + serverFiles: # Caution: Since the prometheus helm chart does not detect changes in server files, update the `prometheus.server.podAnnotations.serverFilesVersion` to force the redeployment of prometheus. + web-config.yml: + tls_server_config: + cert_file: '/certs/node.crt' + key_file: '/certs/node.key' + client_auth_type: 'RequireAndVerifyClientCert' + client_ca_file: '/certs/ca.crt' + prometheus.yml: scrape_configs: - job_name: K8s-Endpoints @@ -307,9 +319,17 @@ grafana: type: prometheus access: proxy orgId: 1 - url: http://dss-prometheus-server:80 + url: https://dss-prometheus-server:80 version: 1 editable: true + jsonData: + tlsAuth: true + tlsAuthWithCACert: true + serverName: 'prometheus' + secureJsonData: + tlsCACert: '$__file{/certs/ca.crt}' + tlsClientCert: '$__file{/certs/client.grafana.crt}' + tlsClientKey: '$__file{/certs/client.grafana.key}' dashboardProviders: dashboardproviders.yaml: @@ -333,3 +353,8 @@ grafana: deploymentStrategy: type: Recreate + + extraSecretMounts: + - name: grafana-certs + secretName: 'monitoring.grafana.certs' + mountPath: /certs/ diff --git a/deploy/services/tanka/grafana.libsonnet b/deploy/services/tanka/grafana.libsonnet index 13e62edf9..f7a43f08b 100644 --- a/deploy/services/tanka/grafana.libsonnet +++ b/deploy/services/tanka/grafana.libsonnet @@ -27,8 +27,18 @@ local datasourcePrometheus(metadata) = { name: 'prometheus', orgId: 1, type: 'prometheus', - url: 'http://prometheus-service.' + metadata.namespace + '.svc:9090', + url: 'https://prometheus-service.' + metadata.namespace + '.svc:9090', version: 1, + jsonData: { + tlsAuth: true, + tlsAuthWithCACert: true, + serverName: 'prometheus', + }, + secureJsonData: { + tlsCACert: '$__file{/certs/ca.crt}', + tlsClientCert: '$__file{/certs/client.grafana.crt}', + tlsClientKey: '$__file{/certs/client.grafana.key}', + }, }, ], }; @@ -127,6 +137,10 @@ local notifierConfig(metadata) = { name: 'grafana-notifier-provisioning', readOnly: false, }, + { + mountPath: '/certs/', + name: 'grafana-certs', + }, ] + dashboard.all(metadata).mount, }, ], @@ -156,6 +170,13 @@ local notifierConfig(metadata) = { name: 'grafana-notifier-provisioning', }, }, + { + name: 'grafana-certs', + secret: { + secretName: 'monitoring.grafana.certs', + defaultMode: 420, + }, + }, ] + dashboard.all(metadata).volumes, }, }, diff --git a/deploy/services/tanka/prometheus.libsonnet b/deploy/services/tanka/prometheus.libsonnet index 450ff5a7a..5cce0831b 100644 --- a/deploy/services/tanka/prometheus.libsonnet +++ b/deploy/services/tanka/prometheus.libsonnet @@ -20,6 +20,15 @@ local PrometheusConfig(metadata) = { scrape_configs: k8sEndpoints.scrape_configs, }; +local PrometheusWebConfig(metadata) = { + tls_server_config: { + cert_file: '/certs/node.crt', + key_file: '/certs/node.key', + client_auth_type: 'RequireAndVerifyClientCert', + client_ca_file: '/certs/ca.crt' + } +}; + local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-external') { app:: 'prometheus', port:: 9090, @@ -93,6 +102,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e configMap: base.ConfigMap(metadata, 'prometheus-conf') { data: { 'prometheus.yml': std.manifestYamlDoc(PrometheusConfig(metadata)), + 'web-config.yml': std.manifestYamlDoc(PrometheusWebConfig(metadata)), 'aggregation.rules.yml': std.manifestYamlDoc(crdbAggregation), 'custom.rules.yml': std.manifestYamlDoc({ groups: [ @@ -112,6 +122,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e metadata+: { annotations+: { "checksum/config": std.native('sha256')(std.manifestJson(PrometheusConfig(metadata))), + "checksum/webconfig": std.native('sha256')(std.manifestJson(PrometheusWebConfig(metadata))), "checksum/k8sEndpoints": std.native('sha256')(std.manifestJson(k8sEndpoints)), "checksum/crdbAggregation": std.native('sha256')(std.manifestJson(crdbAggregation)), }, @@ -131,6 +142,13 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e claimName: 'prometheus-datadir', }, }, + { + name: 'prometheus-certs', + secret: { + secretName: 'monitoring.prometheus.certs', + defaultMode: 420, + }, + }, ], initContainers: [ { @@ -156,6 +174,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e image: metadata.prometheus.image, args: [ '--config.file=/etc/prometheus/prometheus.yml', + '--web.config.file=/etc/prometheus/web-config.yml', '--storage.tsdb.path=/data/prometheus/', '--storage.tsdb.retention.time=' + metadata.prometheus.retention, // following thanos recommendation @@ -175,25 +194,19 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e name: 'prometheus-datadir', mountPath: '/data/prometheus/', }, + { + name: 'prometheus-certs', + mountPath: '/certs/', + }, ], livenessProbe: { - httpGet: { - path: '/-/healthy', + tcpSocket: { port: 9090 }, initialDelaySeconds: 50, periodSeconds: 6, failureThreshold: 200 }, - readinessProbe: { - httpGet: { - path: '/-/ready', - port: 9090 - }, - initialDelaySeconds: 30, - periodSeconds: 6, - failureThreshold: 200, - }, }, ], }, @@ -222,7 +235,6 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e internalService: base.Service(metadata, 'prometheus-service') { app:: 'prometheus', port:: 9090, - enable_monitoring:: true, }, }, } diff --git a/docs/operations/monitoring.md b/docs/operations/monitoring.md index be2302e5d..071d2729d 100644 --- a/docs/operations/monitoring.md +++ b/docs/operations/monitoring.md @@ -29,6 +29,34 @@ kubectl get secrets/dss-grafana -o jsonpath="{.data.admin-password}" | base64 -d Click the magnifying glass on the left side to select a dashboard to view. +### Prometheus access + +Prometheus access is protected by a client certificate. If you need to access the web interface, you will need to import a valid client certificate in your browser. + +!!! info + For day to day usage, you don't need to access Prometheus, use Grafana instead. This is only useful for debugging. + +To build a pkcs12 file from a valid client certificate (use a random password): + +=== "Yugabyte" + ``` + openssl pkcs12 -export -inkey deploy/operations/certificates-management/workspace/demo/clients/client.grafana.key -in deploy/operations/certificates-management/workspace/demo/clients/client.grafana.crt -out /tmp/cert_key.p12 + ``` + +=== "CockroachDB" + ``` + openssl pkcs12 -export -inkey build/workspace/demo/client_certs_dir/client.grafana.key -in build/workspace/demo/client_certs_dir/client.grafana.crt -out /tmp/cert_key.p12 + ``` + +--- + +Then import this file as client certificate in your browser. + +* Firefox: Preferences > Privacy & Security > View Certificates > Your Certificates > Import +* Chrome: Privacy and security > Security > Manage Certificates > Import + +Next time you access the interface, select the certificate you just imported. + ## Prometheus Federation (Multi Cluster Monitoring) The DSS can use [Prometheus](https://prometheus.io/docs/introduction/overview/) to