Skip to content

Commit b251f50

Browse files
committed
CLOUDP-351614: Fix monitoring failure after disabling TLS on AppDB
When TLS is disabled on AppDB, clear stale TLS params from monitoring config's additionalParams to prevent the monitoring agent from trying to use certificate files that no longer exist. Added addTLSParams/clearTLSParams functions that use shared constants to ensure add and remove operations stay in sync.
1 parent 2bf57af commit b251f50

12 files changed

+228
-39
lines changed

.evergreen.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,21 @@ buildvariants:
14391439
tags: [ "pr_patch", "staging", "e2e_test_suite", "static" ]
14401440
run_on:
14411441
- ubuntu2404-large
1442-
<<: *base_om8_dependency
1442+
depends_on:
1443+
- name: build_om_images
1444+
variant: build_om80_images
1445+
- name: build_operator_ubi
1446+
variant: init_test_run
1447+
- name: build_init_database_image_ubi
1448+
variant: init_test_run
1449+
- name: build_test_image
1450+
variant: init_test_run
1451+
- name: build_init_appdb_images_ubi
1452+
variant: init_test_run
1453+
- name: build_init_om_images_ubi
1454+
variant: init_test_run
1455+
- name: publish_helm_chart
1456+
variant: init_test_run
14431457
tasks:
14441458
- name: e2e_static_ops_manager_kind_only_task_group
14451459
- name: e2e_static_ops_manager_kind_6_0_only_task_group
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
kind: fix
3+
date: 2025-12-16
4+
---
5+
6+
* Fixed an issue where monitoring agents would fail after disabling TLS on a MongoDB deployment.

controllers/om/deployment.go

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,16 @@ func (d Deployment) MergeShardedCluster(opts DeploymentShardedClusterMergeOption
251251
return shardsScheduledForRemoval, nil
252252
}
253253

254-
// AddMonitoringAndBackup adds monitoring and backup agents to each process
255-
// The automation agent will update the agents versions to the latest version automatically
254+
// ConfigureMonitoringAndBackup configures monitoring and backup agents for each process.
255+
// This is called on every reconcile to ensure the monitoring/backup config matches the desired state.
256+
// The automation agent will update the agents versions to the latest version automatically.
256257
// Note, that these two are deliberately combined as all clients (standalone, rs etc.) need both backup and monitoring
257-
// together
258-
func (d Deployment) AddMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
258+
// together.
259+
func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
259260
if len(d.getProcesses()) == 0 {
260261
return
261262
}
262-
d.AddMonitoring(log, tls, caFilepath)
263+
d.ConfigureMonitoring(log, tls, caFilepath)
263264
d.addBackup(log)
264265
}
265266

@@ -277,8 +278,9 @@ func (d Deployment) GetReplicaSetByName(name string) ReplicaSet {
277278
return nil
278279
}
279280

280-
// AddMonitoring adds monitoring agents for all processes in the deployment
281-
func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
281+
// ConfigureMonitoring configures monitoring agents for all processes in the deployment.
282+
// This is called on every reconcile to ensure the monitoring config matches the desired state.
283+
func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
282284
if len(d.getProcesses()) == 0 {
283285
return
284286
}
@@ -306,23 +308,61 @@ func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath s
306308
monitoringVersion["hostname"] = p.HostName()
307309

308310
if tls {
309-
additionalParams := map[string]string{
310-
"useSslForAllConnections": "true",
311-
"sslTrustedServerCertificates": caFilePath,
311+
pemKeyFile := ""
312+
if pem := p.EnsureTLSConfig()["PEMKeyFile"]; pem != nil {
313+
pemKeyFile = pem.(string)
312314
}
313-
314-
pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"]
315-
if pemKeyFile != nil {
316-
additionalParams["sslClientCertificate"] = pemKeyFile.(string)
317-
}
318-
319-
monitoringVersion["additionalParams"] = additionalParams
315+
params := map[string]string{}
316+
addTLSParams(params, caFilePath, pemKeyFile)
317+
monitoringVersion["additionalParams"] = params
318+
} else {
319+
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
320+
// trying to use certificate files that no longer exist.
321+
// We only clear TLS fields, preserving any other additionalParams that may be set.
322+
clearTLSParamsFromMonitoringVersion(monitoringVersion)
320323
}
321324

322325
}
323326
d.setMonitoringVersions(monitoringVersions)
324327
}
325328

329+
// TLS param keys for monitoring additionalParams.
330+
const (
331+
tlsParamUseSsl = "useSslForAllConnections"
332+
tlsParamTrustedCert = "sslTrustedServerCertificates"
333+
tlsParamClientCert = "sslClientCertificate"
334+
)
335+
336+
func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) {
337+
params[tlsParamUseSsl] = "true"
338+
params[tlsParamTrustedCert] = caFilePath
339+
if pemKeyFile != "" {
340+
params[tlsParamClientCert] = pemKeyFile
341+
}
342+
}
343+
344+
func clearTLSParams(params map[string]string) {
345+
delete(params, tlsParamUseSsl)
346+
delete(params, tlsParamTrustedCert)
347+
delete(params, tlsParamClientCert)
348+
}
349+
350+
// clearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring
351+
// version's additionalParams. If additionalParams becomes empty after removing TLS fields,
352+
// the entire map is removed.
353+
func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) {
354+
additionalParams, ok := monitoringVersion["additionalParams"].(map[string]string)
355+
if !ok {
356+
return
357+
}
358+
359+
clearTLSParams(additionalParams)
360+
361+
if len(additionalParams) == 0 {
362+
delete(monitoringVersion, "additionalParams")
363+
}
364+
}
365+
326366
// RemoveMonitoringAndBackup removes both monitoring and backup agent configurations. This must be called when the
327367
// Mongodb resource is being removed, otherwise UI will show non-existing agents in the "servers" tab
328368
func (d Deployment) RemoveMonitoringAndBackup(names []string, log *zap.SugaredLogger) {

controllers/om/deployment/testing_utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func CreateFromReplicaSet(mongoDBImage string, forceEnterprise bool, rs *mdb.Mon
3737
lastConfig.ToMap(),
3838
zap.S(),
3939
)
40-
d.AddMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
40+
d.ConfigureMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
4141
d.ConfigureTLS(rs.Spec.GetSecurity(), util.CAFilePathInContainer)
4242
return d
4343
}

controllers/om/deployment_test.go

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -489,12 +489,12 @@ func TestConfiguringTlsProcessFromOpsManager(t *testing.T) {
489489
}
490490
}
491491

492-
func TestAddMonitoring(t *testing.T) {
492+
func TestConfigureMonitoring(t *testing.T) {
493493
d := NewDeployment()
494494

495495
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
496496
d.MergeReplicaSet(rs0, nil, nil, zap.S())
497-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
497+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
498498

499499
expectedMonitoringVersions := []interface{}{
500500
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
@@ -504,16 +504,16 @@ func TestAddMonitoring(t *testing.T) {
504504
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
505505

506506
// adding again - nothing changes
507-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
507+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
508508
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
509509
}
510510

511-
func TestAddMonitoringTls(t *testing.T) {
511+
func TestConfigureMonitoringTls(t *testing.T) {
512512
d := NewDeployment()
513513

514514
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
515515
d.MergeReplicaSet(rs0, nil, nil, zap.S())
516-
d.AddMonitoring(zap.S(), true, util.CAFilePathInContainer)
516+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
517517

518518
expectedAdditionalParams := map[string]string{
519519
"useSslForAllConnections": "true",
@@ -528,10 +528,39 @@ func TestAddMonitoringTls(t *testing.T) {
528528
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
529529

530530
// adding again - nothing changes
531-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
531+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
532532
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
533533
}
534534

535+
func TestConfigureMonitoringTLSDisable(t *testing.T) {
536+
d := NewDeployment()
537+
538+
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
539+
d.MergeReplicaSet(rs0, nil, nil, zap.S())
540+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
541+
542+
// verify TLS is present in additionalParams
543+
expectedAdditionalParams := map[string]string{
544+
"useSslForAllConnections": "true",
545+
"sslTrustedServerCertificates": util.CAFilePathInContainer,
546+
}
547+
expectedMonitoringVersionsWithTls := []interface{}{
548+
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
549+
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
550+
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
551+
}
552+
assert.Equal(t, expectedMonitoringVersionsWithTls, d.getMonitoringVersions())
553+
554+
// disabling TLS should clear additionalParams (CLOUDP-351614)
555+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
556+
expectedMonitoringVersionsWithoutTls := []interface{}{
557+
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
558+
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion},
559+
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion},
560+
}
561+
assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions())
562+
}
563+
535564
func TestAddBackup(t *testing.T) {
536565
d := NewDeployment()
537566

controllers/operator/appdbreplicaset_controller.go

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,9 +1430,17 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14301430
monitoringVersions := ac.MonitoringVersions
14311431
for _, p := range ac.Processes {
14321432
found := false
1433-
for _, m := range monitoringVersions {
1433+
for i, m := range monitoringVersions {
14341434
if m.Hostname == p.HostName {
14351435
found = true
1436+
if !tls {
1437+
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
1438+
// trying to use certificate files that no longer exist.
1439+
clearTLSParams(monitoringVersions[i].AdditionalParams)
1440+
if len(monitoringVersions[i].AdditionalParams) == 0 {
1441+
monitoringVersions[i].AdditionalParams = nil
1442+
}
1443+
}
14361444
break
14371445
}
14381446
}
@@ -1442,15 +1450,12 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14421450
Name: om.MonitoringAgentDefaultVersion,
14431451
}
14441452
if tls {
1445-
additionalParams := map[string]string{
1446-
"useSslForAllConnections": "true",
1447-
"sslTrustedServerCertificates": appdbCAFilePath,
1448-
}
1449-
pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile")
1450-
if pemKeyFile != nil {
1451-
additionalParams["sslClientCertificate"] = pemKeyFile.String()
1453+
pemKeyFile := ""
1454+
if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil {
1455+
pemKeyFile = pem.String()
14521456
}
1453-
monitoringVersion.AdditionalParams = additionalParams
1457+
monitoringVersion.AdditionalParams = map[string]string{}
1458+
addTLSParams(monitoringVersion.AdditionalParams, appdbCAFilePath, pemKeyFile)
14541459
}
14551460
log.Debugw("Added monitoring agent configuration", "host", p.HostName, "tls", tls)
14561461
monitoringVersions = append(monitoringVersions, monitoringVersion)
@@ -1459,6 +1464,27 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14591464
ac.MonitoringVersions = monitoringVersions
14601465
}
14611466

1467+
// TLS param keys for monitoring additionalParams.
1468+
const (
1469+
tlsParamUseSsl = "useSslForAllConnections"
1470+
tlsParamTrustedCert = "sslTrustedServerCertificates"
1471+
tlsParamClientCert = "sslClientCertificate"
1472+
)
1473+
1474+
func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) {
1475+
params[tlsParamUseSsl] = "true"
1476+
params[tlsParamTrustedCert] = caFilePath
1477+
if pemKeyFile != "" {
1478+
params[tlsParamClientCert] = pemKeyFile
1479+
}
1480+
}
1481+
1482+
func clearTLSParams(params map[string]string) {
1483+
delete(params, tlsParamUseSsl)
1484+
delete(params, tlsParamTrustedCert)
1485+
delete(params, tlsParamClientCert)
1486+
}
1487+
14621488
// registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project
14631489
func (r *ReconcileAppDbReplicaSet) registerAppDBHostsWithProject(hostnames []string, conn om.Connection, opsManagerPassword string, log *zap.SugaredLogger) error {
14641490
getHostsResult, err := conn.GetHosts()

controllers/operator/appdbreplicaset_controller_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,3 +1457,77 @@ func createRunningAppDB(ctx context.Context, t *testing.T, startingMembers int,
14571457
assert.Equal(t, ok, res)
14581458
return reconciler
14591459
}
1460+
1461+
// TestClearTLSParams tests CLOUDP-351614 fix:
1462+
// When TLS is disabled on AppDB, TLS-specific params should be cleared from
1463+
// the monitoring config's additionalParams to prevent the monitoring agent
1464+
// from trying to use certificate files that no longer exist.
1465+
func TestClearTLSParams(t *testing.T) {
1466+
tests := []struct {
1467+
name string
1468+
input map[string]string
1469+
expectedOutput map[string]string
1470+
}{
1471+
{
1472+
name: "nil map",
1473+
input: nil,
1474+
expectedOutput: nil,
1475+
},
1476+
{
1477+
name: "empty map",
1478+
input: map[string]string{},
1479+
expectedOutput: map[string]string{},
1480+
},
1481+
{
1482+
name: "only TLS params",
1483+
input: map[string]string{
1484+
"useSslForAllConnections": "true",
1485+
"sslTrustedServerCertificates": "/some/path/ca.pem",
1486+
"sslClientCertificate": "/some/path/cert.pem",
1487+
},
1488+
expectedOutput: map[string]string{},
1489+
},
1490+
{
1491+
name: "mixed params - TLS and non-TLS",
1492+
input: map[string]string{
1493+
"useSslForAllConnections": "true",
1494+
"sslTrustedServerCertificates": "/some/path/ca.pem",
1495+
"sslClientCertificate": "/some/path/cert.pem",
1496+
"someOtherParam": "someValue",
1497+
"anotherParam": "anotherValue",
1498+
},
1499+
expectedOutput: map[string]string{
1500+
"someOtherParam": "someValue",
1501+
"anotherParam": "anotherValue",
1502+
},
1503+
},
1504+
{
1505+
name: "only non-TLS params",
1506+
input: map[string]string{
1507+
"someOtherParam": "someValue",
1508+
"anotherParam": "anotherValue",
1509+
},
1510+
expectedOutput: map[string]string{
1511+
"someOtherParam": "someValue",
1512+
"anotherParam": "anotherValue",
1513+
},
1514+
},
1515+
{
1516+
name: "partial TLS params",
1517+
input: map[string]string{
1518+
"useSslForAllConnections": "true",
1519+
"someOtherParam": "someValue",
1520+
},
1521+
expectedOutput: map[string]string{
1522+
"someOtherParam": "someValue",
1523+
},
1524+
},
1525+
}
1526+
1527+
for _, tt := range tests {
1528+
t.Run(tt.name, func(t *testing.T) {
1529+
clearTLSParams(tt.input)
1530+
assert.Equal(t, tt.expectedOutput, tt.input)
1531+
})
1532+
}
1533+
}

controllers/operator/common_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1075,7 +1075,7 @@ func ReconcileReplicaSetAC(ctx context.Context, d om.Deployment, spec mdbv1.DbCo
10751075
}
10761076

10771077
d.MergeReplicaSet(rs, spec.GetAdditionalMongodConfig().ToMap(), lastMongodConfig, log)
1078-
d.AddMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
1078+
d.ConfigureMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
10791079
d.ConfigureTLS(spec.GetSecurity(), caFilePath)
10801080
d.ConfigureInternalClusterAuthentication(rs.GetProcessNames(), spec.GetSecurity().GetInternalClusterAuthenticationMode(), internalClusterPath)
10811081

controllers/operator/mongodbshardedcluster_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2006,7 +2006,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c
20062006
return err
20072007
}
20082008

2009-
d.AddMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
2009+
d.ConfigureMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
20102010
d.ConfigureTLS(sc.Spec.GetSecurity(), opts.caFilePath)
20112011

20122012
setupInternalClusterAuth(d, sc.Name, sc.GetSecurity().GetInternalClusterAuthenticationMode(),

controllers/operator/mongodbshardedcluster_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1676,7 +1676,7 @@ func createDeploymentFromShardedCluster(t *testing.T, updatable v1.CustomResourc
16761676
Finalizing: false,
16771677
})
16781678
assert.NoError(t, err)
1679-
d.AddMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
1679+
d.ConfigureMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
16801680
return d
16811681
}
16821682

0 commit comments

Comments
 (0)