Skip to content

Commit 65c2ce2

Browse files
nammnclaude
andcommitted
CLOUDP-351614: Fix monitoring failure after disabling TLS on AppDB
When TLS is disabled on AppDB, the operator now correctly clears stale TLS params from the monitoring configuration. This prevents the monitoring agent from trying to use certificate files that no longer exist. Changes: - Rename AddMonitoringAndBackup to ConfigureMonitoringAndBackup (better name) - Clear only TLS-specific fields (useSslForAllConnections, sslTrustedServerCertificates, sslClientCertificate) from additionalParams - Only remove additionalParams map if empty after clearing TLS fields - Fix applied to both deployment.go (regular MongoDB) and appdbreplicaset_controller.go (AppDB) - Add unit tests for the TLS param clearing functions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 2bf57af commit 65c2ce2

12 files changed

+229
-39
lines changed

.evergreen.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,21 @@ buildvariants:
14391439
tags: [ "pr_patch", "staging", "e2e_test_suite", "static" ]
14401440
run_on:
14411441
- ubuntu2404-large
1442-
<<: *base_om8_dependency
1442+
depends_on:
1443+
- name: build_om_images
1444+
variant: build_om80_images
1445+
- name: build_operator_ubi
1446+
variant: init_test_run
1447+
- name: build_init_database_image_ubi
1448+
variant: init_test_run
1449+
- name: build_test_image
1450+
variant: init_test_run
1451+
- name: build_init_appdb_images_ubi
1452+
variant: init_test_run
1453+
- name: build_init_om_images_ubi
1454+
variant: init_test_run
1455+
- name: publish_helm_chart
1456+
variant: init_test_run
14431457
tasks:
14441458
- name: e2e_static_ops_manager_kind_only_task_group
14451459
- name: e2e_static_ops_manager_kind_6_0_only_task_group
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
kind: fix
3+
date: 2025-12-16
4+
---
5+
6+
* Fixed an issue where monitoring agents would fail after disabling TLS on a MongoDB deployment.

controllers/om/deployment.go

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,16 @@ func (d Deployment) MergeShardedCluster(opts DeploymentShardedClusterMergeOption
251251
return shardsScheduledForRemoval, nil
252252
}
253253

254-
// AddMonitoringAndBackup adds monitoring and backup agents to each process
255-
// The automation agent will update the agents versions to the latest version automatically
254+
// ConfigureMonitoringAndBackup configures monitoring and backup agents for each process.
255+
// This is called on every reconcile to ensure the monitoring/backup config matches the desired state.
256+
// The automation agent will update the agents versions to the latest version automatically.
256257
// Note, that these two are deliberately combined as all clients (standalone, rs etc.) need both backup and monitoring
257-
// together
258-
func (d Deployment) AddMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
258+
// together.
259+
func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
259260
if len(d.getProcesses()) == 0 {
260261
return
261262
}
262-
d.AddMonitoring(log, tls, caFilepath)
263+
d.ConfigureMonitoring(log, tls, caFilepath)
263264
d.addBackup(log)
264265
}
265266

@@ -277,8 +278,9 @@ func (d Deployment) GetReplicaSetByName(name string) ReplicaSet {
277278
return nil
278279
}
279280

280-
// AddMonitoring adds monitoring agents for all processes in the deployment
281-
func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
281+
// ConfigureMonitoring configures monitoring agents for all processes in the deployment.
282+
// This is called on every reconcile to ensure the monitoring config matches the desired state.
283+
func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
282284
if len(d.getProcesses()) == 0 {
283285
return
284286
}
@@ -306,23 +308,56 @@ func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath s
306308
monitoringVersion["hostname"] = p.HostName()
307309

308310
if tls {
309-
additionalParams := map[string]string{
310-
"useSslForAllConnections": "true",
311-
"sslTrustedServerCertificates": caFilePath,
311+
pemKeyFile := ""
312+
if pem := p.EnsureTLSConfig()["PEMKeyFile"]; pem != nil {
313+
pemKeyFile = pem.(string)
312314
}
313-
314-
pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"]
315-
if pemKeyFile != nil {
316-
additionalParams["sslClientCertificate"] = pemKeyFile.(string)
317-
}
318-
319-
monitoringVersion["additionalParams"] = additionalParams
315+
monitoringVersion["additionalParams"] = buildTLSAdditionalParams(caFilePath, pemKeyFile)
316+
} else {
317+
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
318+
// trying to use certificate files that no longer exist.
319+
// We only clear TLS fields, preserving any other additionalParams that may be set.
320+
clearTLSParamsFromMonitoringVersion(monitoringVersion)
320321
}
321322

322323
}
323324
d.setMonitoringVersions(monitoringVersions)
324325
}
325326

327+
// buildTLSAdditionalParams creates the additionalParams map for monitoring with TLS enabled.
328+
// This is the single source of truth for TLS params - clearTLSParamsFromMonitoringVersion
329+
// uses this function to determine which keys to remove.
330+
func buildTLSAdditionalParams(caFilePath string, pemKeyFile string) map[string]string {
331+
params := map[string]string{
332+
"useSslForAllConnections": "true",
333+
"sslTrustedServerCertificates": caFilePath,
334+
}
335+
if pemKeyFile != "" {
336+
params["sslClientCertificate"] = pemKeyFile
337+
}
338+
return params
339+
}
340+
341+
// clearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring
342+
// version's additionalParams. It derives which keys to remove from buildTLSAdditionalParams,
343+
// ensuring add and remove operations always stay in sync.
344+
// If additionalParams becomes empty after removing TLS fields, the entire map is removed.
345+
func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) {
346+
additionalParams, ok := monitoringVersion["additionalParams"].(map[string]string)
347+
if !ok {
348+
return
349+
}
350+
351+
// Use non-empty dummy values to get all possible TLS param keys
352+
for key := range buildTLSAdditionalParams("_", "_") {
353+
delete(additionalParams, key)
354+
}
355+
356+
if len(additionalParams) == 0 {
357+
delete(monitoringVersion, "additionalParams")
358+
}
359+
}
360+
326361
// RemoveMonitoringAndBackup removes both monitoring and backup agent configurations. This must be called when the
327362
// Mongodb resource is being removed, otherwise UI will show non-existing agents in the "servers" tab
328363
func (d Deployment) RemoveMonitoringAndBackup(names []string, log *zap.SugaredLogger) {

controllers/om/deployment/testing_utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func CreateFromReplicaSet(mongoDBImage string, forceEnterprise bool, rs *mdb.Mon
3737
lastConfig.ToMap(),
3838
zap.S(),
3939
)
40-
d.AddMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
40+
d.ConfigureMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
4141
d.ConfigureTLS(rs.Spec.GetSecurity(), util.CAFilePathInContainer)
4242
return d
4343
}

controllers/om/deployment_test.go

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -489,12 +489,12 @@ func TestConfiguringTlsProcessFromOpsManager(t *testing.T) {
489489
}
490490
}
491491

492-
func TestAddMonitoring(t *testing.T) {
492+
func TestConfigureMonitoring(t *testing.T) {
493493
d := NewDeployment()
494494

495495
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
496496
d.MergeReplicaSet(rs0, nil, nil, zap.S())
497-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
497+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
498498

499499
expectedMonitoringVersions := []interface{}{
500500
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
@@ -504,16 +504,16 @@ func TestAddMonitoring(t *testing.T) {
504504
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
505505

506506
// adding again - nothing changes
507-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
507+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
508508
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
509509
}
510510

511-
func TestAddMonitoringTls(t *testing.T) {
511+
func TestConfigureMonitoringTls(t *testing.T) {
512512
d := NewDeployment()
513513

514514
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
515515
d.MergeReplicaSet(rs0, nil, nil, zap.S())
516-
d.AddMonitoring(zap.S(), true, util.CAFilePathInContainer)
516+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
517517

518518
expectedAdditionalParams := map[string]string{
519519
"useSslForAllConnections": "true",
@@ -528,10 +528,39 @@ func TestAddMonitoringTls(t *testing.T) {
528528
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
529529

530530
// adding again - nothing changes
531-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
531+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
532532
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
533533
}
534534

535+
func TestConfigureMonitoringTLSDisable(t *testing.T) {
536+
d := NewDeployment()
537+
538+
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
539+
d.MergeReplicaSet(rs0, nil, nil, zap.S())
540+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
541+
542+
// verify TLS is present in additionalParams
543+
expectedAdditionalParams := map[string]string{
544+
"useSslForAllConnections": "true",
545+
"sslTrustedServerCertificates": util.CAFilePathInContainer,
546+
}
547+
expectedMonitoringVersionsWithTls := []interface{}{
548+
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
549+
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
550+
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
551+
}
552+
assert.Equal(t, expectedMonitoringVersionsWithTls, d.getMonitoringVersions())
553+
554+
// disabling TLS should clear additionalParams (CLOUDP-351614)
555+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
556+
expectedMonitoringVersionsWithoutTls := []interface{}{
557+
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
558+
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion},
559+
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion},
560+
}
561+
assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions())
562+
}
563+
535564
func TestAddBackup(t *testing.T) {
536565
d := NewDeployment()
537566

controllers/operator/appdbreplicaset_controller.go

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,9 +1430,18 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14301430
monitoringVersions := ac.MonitoringVersions
14311431
for _, p := range ac.Processes {
14321432
found := false
1433-
for _, m := range monitoringVersions {
1433+
for i, m := range monitoringVersions {
14341434
if m.Hostname == p.HostName {
14351435
found = true
1436+
if !tls {
1437+
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
1438+
// trying to use certificate files that no longer exist.
1439+
// We only clear TLS fields, preserving any other additionalParams that may be set.
1440+
clearTLSParamsFromAdditionalParams(monitoringVersions[i].AdditionalParams)
1441+
if len(monitoringVersions[i].AdditionalParams) == 0 {
1442+
monitoringVersions[i].AdditionalParams = nil
1443+
}
1444+
}
14361445
break
14371446
}
14381447
}
@@ -1442,15 +1451,11 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14421451
Name: om.MonitoringAgentDefaultVersion,
14431452
}
14441453
if tls {
1445-
additionalParams := map[string]string{
1446-
"useSslForAllConnections": "true",
1447-
"sslTrustedServerCertificates": appdbCAFilePath,
1448-
}
1449-
pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile")
1450-
if pemKeyFile != nil {
1451-
additionalParams["sslClientCertificate"] = pemKeyFile.String()
1454+
pemKeyFile := ""
1455+
if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil {
1456+
pemKeyFile = pem.String()
14521457
}
1453-
monitoringVersion.AdditionalParams = additionalParams
1458+
monitoringVersion.AdditionalParams = buildTLSAdditionalParams(appdbCAFilePath, pemKeyFile)
14541459
}
14551460
log.Debugw("Added monitoring agent configuration", "host", p.HostName, "tls", tls)
14561461
monitoringVersions = append(monitoringVersions, monitoringVersion)
@@ -1459,6 +1464,33 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14591464
ac.MonitoringVersions = monitoringVersions
14601465
}
14611466

1467+
// buildTLSAdditionalParams creates the additionalParams map for monitoring with TLS enabled.
1468+
// This is the single source of truth for TLS params - clearTLSParamsFromAdditionalParams
1469+
// uses this function to determine which keys to remove.
1470+
func buildTLSAdditionalParams(caFilePath string, pemKeyFile string) map[string]string {
1471+
params := map[string]string{
1472+
"useSslForAllConnections": "true",
1473+
"sslTrustedServerCertificates": caFilePath,
1474+
}
1475+
if pemKeyFile != "" {
1476+
params["sslClientCertificate"] = pemKeyFile
1477+
}
1478+
return params
1479+
}
1480+
1481+
// clearTLSParamsFromAdditionalParams removes TLS-specific fields from the monitoring
1482+
// version's additionalParams map. It derives which keys to remove from buildTLSAdditionalParams,
1483+
// ensuring add and remove operations always stay in sync.
1484+
func clearTLSParamsFromAdditionalParams(additionalParams map[string]string) {
1485+
if additionalParams == nil {
1486+
return
1487+
}
1488+
// Use non-empty dummy values to get all possible TLS param keys
1489+
for key := range buildTLSAdditionalParams("_", "_") {
1490+
delete(additionalParams, key)
1491+
}
1492+
}
1493+
14621494
// registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project
14631495
func (r *ReconcileAppDbReplicaSet) registerAppDBHostsWithProject(hostnames []string, conn om.Connection, opsManagerPassword string, log *zap.SugaredLogger) error {
14641496
getHostsResult, err := conn.GetHosts()

controllers/operator/appdbreplicaset_controller_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,3 +1457,77 @@ func createRunningAppDB(ctx context.Context, t *testing.T, startingMembers int,
14571457
assert.Equal(t, ok, res)
14581458
return reconciler
14591459
}
1460+
1461+
// TestClearTLSParamsFromAdditionalParams tests CLOUDP-351614 fix:
1462+
// When TLS is disabled on AppDB, TLS-specific params should be cleared from
1463+
// the monitoring config's additionalParams to prevent the monitoring agent
1464+
// from trying to use certificate files that no longer exist.
1465+
func TestClearTLSParamsFromAdditionalParams(t *testing.T) {
1466+
tests := []struct {
1467+
name string
1468+
input map[string]string
1469+
expectedOutput map[string]string
1470+
}{
1471+
{
1472+
name: "nil map",
1473+
input: nil,
1474+
expectedOutput: nil,
1475+
},
1476+
{
1477+
name: "empty map",
1478+
input: map[string]string{},
1479+
expectedOutput: map[string]string{},
1480+
},
1481+
{
1482+
name: "only TLS params",
1483+
input: map[string]string{
1484+
"useSslForAllConnections": "true",
1485+
"sslTrustedServerCertificates": "/some/path/ca.pem",
1486+
"sslClientCertificate": "/some/path/cert.pem",
1487+
},
1488+
expectedOutput: map[string]string{},
1489+
},
1490+
{
1491+
name: "mixed params - TLS and non-TLS",
1492+
input: map[string]string{
1493+
"useSslForAllConnections": "true",
1494+
"sslTrustedServerCertificates": "/some/path/ca.pem",
1495+
"sslClientCertificate": "/some/path/cert.pem",
1496+
"someOtherParam": "someValue",
1497+
"anotherParam": "anotherValue",
1498+
},
1499+
expectedOutput: map[string]string{
1500+
"someOtherParam": "someValue",
1501+
"anotherParam": "anotherValue",
1502+
},
1503+
},
1504+
{
1505+
name: "only non-TLS params",
1506+
input: map[string]string{
1507+
"someOtherParam": "someValue",
1508+
"anotherParam": "anotherValue",
1509+
},
1510+
expectedOutput: map[string]string{
1511+
"someOtherParam": "someValue",
1512+
"anotherParam": "anotherValue",
1513+
},
1514+
},
1515+
{
1516+
name: "partial TLS params",
1517+
input: map[string]string{
1518+
"useSslForAllConnections": "true",
1519+
"someOtherParam": "someValue",
1520+
},
1521+
expectedOutput: map[string]string{
1522+
"someOtherParam": "someValue",
1523+
},
1524+
},
1525+
}
1526+
1527+
for _, tt := range tests {
1528+
t.Run(tt.name, func(t *testing.T) {
1529+
clearTLSParamsFromAdditionalParams(tt.input)
1530+
assert.Equal(t, tt.expectedOutput, tt.input)
1531+
})
1532+
}
1533+
}

controllers/operator/common_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1075,7 +1075,7 @@ func ReconcileReplicaSetAC(ctx context.Context, d om.Deployment, spec mdbv1.DbCo
10751075
}
10761076

10771077
d.MergeReplicaSet(rs, spec.GetAdditionalMongodConfig().ToMap(), lastMongodConfig, log)
1078-
d.AddMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
1078+
d.ConfigureMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
10791079
d.ConfigureTLS(spec.GetSecurity(), caFilePath)
10801080
d.ConfigureInternalClusterAuthentication(rs.GetProcessNames(), spec.GetSecurity().GetInternalClusterAuthenticationMode(), internalClusterPath)
10811081

controllers/operator/mongodbshardedcluster_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2006,7 +2006,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c
20062006
return err
20072007
}
20082008

2009-
d.AddMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
2009+
d.ConfigureMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
20102010
d.ConfigureTLS(sc.Spec.GetSecurity(), opts.caFilePath)
20112011

20122012
setupInternalClusterAuth(d, sc.Name, sc.GetSecurity().GetInternalClusterAuthenticationMode(),

controllers/operator/mongodbshardedcluster_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1676,7 +1676,7 @@ func createDeploymentFromShardedCluster(t *testing.T, updatable v1.CustomResourc
16761676
Finalizing: false,
16771677
})
16781678
assert.NoError(t, err)
1679-
d.AddMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
1679+
d.ConfigureMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
16801680
return d
16811681
}
16821682

0 commit comments

Comments
 (0)