Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions deploy/services/helm-charts/dss/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,17 @@ prometheus:
evaluation_interval: 5s

podAnnotations:
serverFilesVersion: "1" # Since the prometheus helm chart does not detect changes in server files, this value is used to force the redeployment following the update of the serverFiles defined below.
serverFilesVersion: "2" # Since the prometheus helm chart does not detect changes in server files, this value is used to force the redeployment following the update of the serverFiles defined below.

service:
annotations:
'prometheus.io/scrape': 'true'
'prometheus.io/port': '9090'
extraFlags:
- "web.config.file=/etc/config/web-config.yml"

extraSecretMounts:
- name: prometheus-certs
secretName: 'monitoring.prometheus.certs'
mountPath: /certs/

tcpSocketProbeEnabled: true

prometheus-pushgateway:
enabled: false
Expand All @@ -104,7 +109,14 @@ prometheus:
alertmanager:
enabled: false

serverFiles: # Caution: Since the prometheus helm chart does not detect changes in server files, update the `prometheus.server.podAnnotations.serverFilesVersion` to force the redeployment of prometheus.
serverFiles: # Caution: Since the prometheus helm chart does not detect changes in server files, update the `prometheus.server.podAnnotations.serverFilesVersion` to force the redeployment of prometheus.
web-config.yml:
tls_server_config:
cert_file: '/certs/node.crt'
key_file: '/certs/node.key'
client_auth_type: 'RequireAndVerifyClientCert'
client_ca_file: '/certs/ca.crt'

prometheus.yml:
scrape_configs:
- job_name: K8s-Endpoints
Expand Down Expand Up @@ -307,9 +319,17 @@ grafana:
type: prometheus
access: proxy
orgId: 1
url: http://dss-prometheus-server:80
url: https://dss-prometheus-server:80
version: 1
editable: true
jsonData:
tlsAuth: true
tlsAuthWithCACert: true
serverName: 'prometheus'
secureJsonData:
tlsCACert: '$__file{/certs/ca.crt}'
tlsClientCert: '$__file{/certs/client.grafana.crt}'
tlsClientKey: '$__file{/certs/client.grafana.key}'

dashboardProviders:
dashboardproviders.yaml:
Expand All @@ -333,3 +353,8 @@ grafana:

deploymentStrategy:
type: Recreate

extraSecretMounts:
- name: grafana-certs
secretName: 'monitoring.grafana.certs'
mountPath: /certs/
23 changes: 22 additions & 1 deletion deploy/services/tanka/grafana.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,18 @@ local datasourcePrometheus(metadata) = {
name: 'prometheus',
orgId: 1,
type: 'prometheus',
url: 'http://prometheus-service.' + metadata.namespace + '.svc:9090',
url: 'https://prometheus-service.' + metadata.namespace + '.svc:9090',
version: 1,
jsonData: {
tlsAuth: true,
tlsAuthWithCACert: true,
serverName: 'prometheus',
},
secureJsonData: {
tlsCACert: '$__file{/certs/ca.crt}',
tlsClientCert: '$__file{/certs/client.grafana.crt}',
tlsClientKey: '$__file{/certs/client.grafana.key}',
},
},
],
};
Expand Down Expand Up @@ -127,6 +137,10 @@ local notifierConfig(metadata) = {
name: 'grafana-notifier-provisioning',
readOnly: false,
},
{
mountPath: '/certs/',
name: 'grafana-certs',
},
] + dashboard.all(metadata).mount,
},
],
Expand Down Expand Up @@ -156,6 +170,13 @@ local notifierConfig(metadata) = {
name: 'grafana-notifier-provisioning',
},
},
{
name: 'grafana-certs',
secret: {
secretName: 'monitoring.grafana.certs',
defaultMode: 420,
},
},
] + dashboard.all(metadata).volumes,
},
},
Expand Down
36 changes: 24 additions & 12 deletions deploy/services/tanka/prometheus.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ local PrometheusConfig(metadata) = {
scrape_configs: k8sEndpoints.scrape_configs,
};

local PrometheusWebConfig(metadata) = {
tls_server_config: {
cert_file: '/certs/node.crt',
key_file: '/certs/node.key',
client_auth_type: 'RequireAndVerifyClientCert',
client_ca_file: '/certs/ca.crt'
}
};

local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-external') {
app:: 'prometheus',
port:: 9090,
Expand Down Expand Up @@ -93,6 +102,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
configMap: base.ConfigMap(metadata, 'prometheus-conf') {
data: {
'prometheus.yml': std.manifestYamlDoc(PrometheusConfig(metadata)),
'web-config.yml': std.manifestYamlDoc(PrometheusWebConfig(metadata)),
'aggregation.rules.yml': std.manifestYamlDoc(crdbAggregation),
'custom.rules.yml': std.manifestYamlDoc({
groups: [
Expand All @@ -112,6 +122,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
metadata+: {
annotations+: {
"checksum/config": std.native('sha256')(std.manifestJson(PrometheusConfig(metadata))),
"checksum/webconfig": std.native('sha256')(std.manifestJson(PrometheusWebConfig(metadata))),
"checksum/k8sEndpoints": std.native('sha256')(std.manifestJson(k8sEndpoints)),
"checksum/crdbAggregation": std.native('sha256')(std.manifestJson(crdbAggregation)),
},
Expand All @@ -131,6 +142,13 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
claimName: 'prometheus-datadir',
},
},
{
name: 'prometheus-certs',
secret: {
secretName: 'monitoring.prometheus.certs',
defaultMode: 420,
},
},
],
initContainers: [
{
Expand All @@ -156,6 +174,7 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
image: metadata.prometheus.image,
args: [
'--config.file=/etc/prometheus/prometheus.yml',
'--web.config.file=/etc/prometheus/web-config.yml',
'--storage.tsdb.path=/data/prometheus/',
'--storage.tsdb.retention.time=' + metadata.prometheus.retention,
// following thanos recommendation
Expand All @@ -175,25 +194,19 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
name: 'prometheus-datadir',
mountPath: '/data/prometheus/',
},
{
name: 'prometheus-certs',
mountPath: '/certs/',
},
],
livenessProbe: {
httpGet: {
path: '/-/healthy',
tcpSocket: {
port: 9090
},
initialDelaySeconds: 50,
periodSeconds: 6,
failureThreshold: 200
},
readinessProbe: {
httpGet: {
path: '/-/ready',
port: 9090
},
initialDelaySeconds: 30,
periodSeconds: 6,
failureThreshold: 200,
},
},
],
},
Expand Down Expand Up @@ -222,7 +235,6 @@ local PrometheusExternalService(metadata) = base.Service(metadata, 'prometheus-e
internalService: base.Service(metadata, 'prometheus-service') {
app:: 'prometheus',
port:: 9090,
enable_monitoring:: true,
},
},
}
28 changes: 28 additions & 0 deletions docs/operations/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,34 @@ kubectl get secrets/dss-grafana -o jsonpath="{.data.admin-password}" | base64 -d

Click the magnifying glass on the left side to select a dashboard to view.

### Prometheus access

Prometheus access is protected by a client certificate. If you need to access the web interface, you will need to import a valid client certificate in your browser.

!!! info
For day to day usage, you don't need to access Prometheus, use Grafana instead. This is only useful for debugging.

To build a pkcs12 file from a valid client certificate (use a random password):

=== "Yugabyte"
```
openssl pkcs12 -export -inkey deploy/operations/certificates-management/workspace/demo/clients/client.grafana.key -in deploy/operations/certificates-management/workspace/demo/clients/client.grafana.crt -out /tmp/cert_key.p12
```

=== "CockroachDB"
```
openssl pkcs12 -export -inkey build/workspace/demo/client_certs_dir/client.grafana.key -in build/workspace/demo/client_certs_dir/client.grafana.crt -out /tmp/cert_key.p12
```

---

Then import this file as client certificate in your browser.

* Firefox: Preferences > Privacy & Security > View Certificates > Your Certificates > Import
* Chrome: Privacy and security > Security > Manage Certificates > Import

Next time you access the interface, select the certificate you just imported.

## Prometheus Federation (Multi Cluster Monitoring)

The DSS can use [Prometheus](https://prometheus.io/docs/introduction/overview/) to
Expand Down
Loading