Skip to content

Commit 2d967f1

Browse files
nammnclaude
andcommitted
CLOUDP-351614: Fix monitoring failure after disabling TLS on AppDB
When TLS is disabled on AppDB, the operator now correctly clears stale TLS params from the monitoring configuration. This prevents the monitoring agent from trying to use certificate files that no longer exist. Changes: - Rename AddMonitoringAndBackup to ConfigureMonitoringAndBackup (better name) - Clear only TLS-specific fields (useSslForAllConnections, sslTrustedServerCertificates, sslClientCertificate) from additionalParams - Only remove additionalParams map if empty after clearing TLS fields - Fix applied to both deployment.go (regular MongoDB) and appdbreplicaset_controller.go (AppDB) - Add unit tests for the TLS param clearing functions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 2bf57af commit 2d967f1

12 files changed

+229
-27
lines changed

.evergreen.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,21 @@ buildvariants:
14391439
tags: [ "pr_patch", "staging", "e2e_test_suite", "static" ]
14401440
run_on:
14411441
- ubuntu2404-large
1442-
<<: *base_om8_dependency
1442+
depends_on:
1443+
- name: build_om_images
1444+
variant: build_om80_images
1445+
- name: build_operator_ubi
1446+
variant: init_test_run
1447+
- name: build_init_database_image_ubi
1448+
variant: init_test_run
1449+
- name: build_test_image
1450+
variant: init_test_run
1451+
- name: build_init_appdb_images_ubi
1452+
variant: init_test_run
1453+
- name: build_init_om_images_ubi
1454+
variant: init_test_run
1455+
- name: publish_helm_chart
1456+
variant: init_test_run
14431457
tasks:
14441458
- name: e2e_static_ops_manager_kind_only_task_group
14451459
- name: e2e_static_ops_manager_kind_6_0_only_task_group
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
kind: fix
3+
date: 2025-12-16
4+
---
5+
6+
* Fixed an issue where monitoring agents would fail after disabling TLS on a MongoDB deployment.

controllers/om/deployment.go

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,16 @@ func (d Deployment) MergeShardedCluster(opts DeploymentShardedClusterMergeOption
251251
return shardsScheduledForRemoval, nil
252252
}
253253

254-
// AddMonitoringAndBackup adds monitoring and backup agents to each process
255-
// The automation agent will update the agents versions to the latest version automatically
254+
// ConfigureMonitoringAndBackup configures monitoring and backup agents for each process.
255+
// This is called on every reconcile to ensure the monitoring/backup config matches the desired state.
256+
// The automation agent will update the agents versions to the latest version automatically.
256257
// Note, that these two are deliberately combined as all clients (standalone, rs etc.) need both backup and monitoring
257-
// together
258-
func (d Deployment) AddMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
258+
// together.
259+
func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
259260
if len(d.getProcesses()) == 0 {
260261
return
261262
}
262-
d.AddMonitoring(log, tls, caFilepath)
263+
d.ConfigureMonitoring(log, tls, caFilepath)
263264
d.addBackup(log)
264265
}
265266

@@ -277,8 +278,9 @@ func (d Deployment) GetReplicaSetByName(name string) ReplicaSet {
277278
return nil
278279
}
279280

280-
// AddMonitoring adds monitoring agents for all processes in the deployment
281-
func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
281+
// ConfigureMonitoring configures monitoring agents for all processes in the deployment.
282+
// This is called on every reconcile to ensure the monitoring config matches the desired state.
283+
func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
282284
if len(d.getProcesses()) == 0 {
283285
return
284286
}
@@ -307,22 +309,62 @@ func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath s
307309

308310
if tls {
309311
additionalParams := map[string]string{
310-
"useSslForAllConnections": "true",
311-
"sslTrustedServerCertificates": caFilePath,
312+
tlsParamUseSslForAllConnections: "true",
313+
tlsParamSslTrustedServerCertificates: caFilePath,
312314
}
313315

314316
pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"]
315317
if pemKeyFile != nil {
316-
additionalParams["sslClientCertificate"] = pemKeyFile.(string)
318+
additionalParams[tlsParamSslClientCertificate] = pemKeyFile.(string)
317319
}
318320

319321
monitoringVersion["additionalParams"] = additionalParams
322+
} else {
323+
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
324+
// trying to use certificate files that no longer exist.
325+
// We only clear TLS fields, preserving any other additionalParams that may be set.
326+
clearTLSParamsFromMonitoringVersion(monitoringVersion)
320327
}
321328

322329
}
323330
d.setMonitoringVersions(monitoringVersions)
324331
}
325332

333+
// TLS parameter keys for monitoring agent additionalParams.
334+
// These constants are used both when adding TLS params (when TLS is enabled)
335+
// and when clearing them (when TLS is disabled).
336+
const (
337+
tlsParamUseSslForAllConnections = "useSslForAllConnections"
338+
tlsParamSslTrustedServerCertificates = "sslTrustedServerCertificates"
339+
tlsParamSslClientCertificate = "sslClientCertificate"
340+
)
341+
342+
// tlsAdditionalParams are the TLS-specific fields in additionalParams that should be
343+
// cleared when TLS is disabled on a deployment.
344+
var tlsAdditionalParams = []string{
345+
tlsParamUseSslForAllConnections,
346+
tlsParamSslTrustedServerCertificates,
347+
tlsParamSslClientCertificate,
348+
}
349+
350+
// clearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring
351+
// version's additionalParams. If additionalParams becomes empty after removing TLS fields,
352+
// the entire additionalParams map is removed.
353+
func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) {
354+
additionalParams, ok := monitoringVersion["additionalParams"].(map[string]string)
355+
if !ok {
356+
return
357+
}
358+
359+
for _, param := range tlsAdditionalParams {
360+
delete(additionalParams, param)
361+
}
362+
363+
if len(additionalParams) == 0 {
364+
delete(monitoringVersion, "additionalParams")
365+
}
366+
}
367+
326368
// RemoveMonitoringAndBackup removes both monitoring and backup agent configurations. This must be called when the
327369
// Mongodb resource is being removed, otherwise UI will show non-existing agents in the "servers" tab
328370
func (d Deployment) RemoveMonitoringAndBackup(names []string, log *zap.SugaredLogger) {

controllers/om/deployment/testing_utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func CreateFromReplicaSet(mongoDBImage string, forceEnterprise bool, rs *mdb.Mon
3737
lastConfig.ToMap(),
3838
zap.S(),
3939
)
40-
d.AddMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
40+
d.ConfigureMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
4141
d.ConfigureTLS(rs.Spec.GetSecurity(), util.CAFilePathInContainer)
4242
return d
4343
}

controllers/om/deployment_test.go

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -489,12 +489,12 @@ func TestConfiguringTlsProcessFromOpsManager(t *testing.T) {
489489
}
490490
}
491491

492-
func TestAddMonitoring(t *testing.T) {
492+
func TestConfigureMonitoring(t *testing.T) {
493493
d := NewDeployment()
494494

495495
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
496496
d.MergeReplicaSet(rs0, nil, nil, zap.S())
497-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
497+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
498498

499499
expectedMonitoringVersions := []interface{}{
500500
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
@@ -504,16 +504,16 @@ func TestAddMonitoring(t *testing.T) {
504504
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
505505

506506
// adding again - nothing changes
507-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
507+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
508508
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
509509
}
510510

511-
func TestAddMonitoringTls(t *testing.T) {
511+
func TestConfigureMonitoringTls(t *testing.T) {
512512
d := NewDeployment()
513513

514514
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
515515
d.MergeReplicaSet(rs0, nil, nil, zap.S())
516-
d.AddMonitoring(zap.S(), true, util.CAFilePathInContainer)
516+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
517517

518518
expectedAdditionalParams := map[string]string{
519519
"useSslForAllConnections": "true",
@@ -528,10 +528,39 @@ func TestAddMonitoringTls(t *testing.T) {
528528
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
529529

530530
// adding again - nothing changes
531-
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
531+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
532532
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
533533
}
534534

535+
func TestConfigureMonitoringTLSDisable(t *testing.T) {
536+
d := NewDeployment()
537+
538+
rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
539+
d.MergeReplicaSet(rs0, nil, nil, zap.S())
540+
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
541+
542+
// verify TLS is present in additionalParams
543+
expectedAdditionalParams := map[string]string{
544+
"useSslForAllConnections": "true",
545+
"sslTrustedServerCertificates": util.CAFilePathInContainer,
546+
}
547+
expectedMonitoringVersionsWithTls := []interface{}{
548+
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
549+
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
550+
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
551+
}
552+
assert.Equal(t, expectedMonitoringVersionsWithTls, d.getMonitoringVersions())
553+
554+
// disabling TLS should clear additionalParams (CLOUDP-351614)
555+
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
556+
expectedMonitoringVersionsWithoutTls := []interface{}{
557+
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
558+
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion},
559+
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion},
560+
}
561+
assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions())
562+
}
563+
535564
func TestAddBackup(t *testing.T) {
536565
d := NewDeployment()
537566

controllers/operator/appdbreplicaset_controller.go

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,9 +1430,18 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14301430
monitoringVersions := ac.MonitoringVersions
14311431
for _, p := range ac.Processes {
14321432
found := false
1433-
for _, m := range monitoringVersions {
1433+
for i, m := range monitoringVersions {
14341434
if m.Hostname == p.HostName {
14351435
found = true
1436+
if !tls {
1437+
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
1438+
// trying to use certificate files that no longer exist.
1439+
// We only clear TLS fields, preserving any other additionalParams that may be set.
1440+
clearTLSParamsFromAdditionalParams(monitoringVersions[i].AdditionalParams)
1441+
if len(monitoringVersions[i].AdditionalParams) == 0 {
1442+
monitoringVersions[i].AdditionalParams = nil
1443+
}
1444+
}
14361445
break
14371446
}
14381447
}
@@ -1443,12 +1452,12 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14431452
}
14441453
if tls {
14451454
additionalParams := map[string]string{
1446-
"useSslForAllConnections": "true",
1447-
"sslTrustedServerCertificates": appdbCAFilePath,
1455+
tlsParamUseSslForAllConnections: "true",
1456+
tlsParamSslTrustedServerCertificates: appdbCAFilePath,
14481457
}
14491458
pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile")
14501459
if pemKeyFile != nil {
1451-
additionalParams["sslClientCertificate"] = pemKeyFile.String()
1460+
additionalParams[tlsParamSslClientCertificate] = pemKeyFile.String()
14521461
}
14531462
monitoringVersion.AdditionalParams = additionalParams
14541463
}
@@ -1459,6 +1468,34 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
14591468
ac.MonitoringVersions = monitoringVersions
14601469
}
14611470

1471+
// TLS parameter keys for monitoring agent additionalParams.
1472+
// These constants are used both when adding TLS params (when TLS is enabled)
1473+
// and when clearing them (when TLS is disabled).
1474+
const (
1475+
tlsParamUseSslForAllConnections = "useSslForAllConnections"
1476+
tlsParamSslTrustedServerCertificates = "sslTrustedServerCertificates"
1477+
tlsParamSslClientCertificate = "sslClientCertificate"
1478+
)
1479+
1480+
// tlsAdditionalParams are the TLS-specific fields in additionalParams that should be
1481+
// cleared when TLS is disabled on an AppDB deployment.
1482+
var tlsAdditionalParams = []string{
1483+
tlsParamUseSslForAllConnections,
1484+
tlsParamSslTrustedServerCertificates,
1485+
tlsParamSslClientCertificate,
1486+
}
1487+
1488+
// clearTLSParamsFromAdditionalParams removes TLS-specific fields from the monitoring
1489+
// version's additionalParams map.
1490+
func clearTLSParamsFromAdditionalParams(additionalParams map[string]string) {
1491+
if additionalParams == nil {
1492+
return
1493+
}
1494+
for _, param := range tlsAdditionalParams {
1495+
delete(additionalParams, param)
1496+
}
1497+
}
1498+
14621499
// registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project
14631500
func (r *ReconcileAppDbReplicaSet) registerAppDBHostsWithProject(hostnames []string, conn om.Connection, opsManagerPassword string, log *zap.SugaredLogger) error {
14641501
getHostsResult, err := conn.GetHosts()

controllers/operator/appdbreplicaset_controller_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,3 +1457,77 @@ func createRunningAppDB(ctx context.Context, t *testing.T, startingMembers int,
14571457
assert.Equal(t, ok, res)
14581458
return reconciler
14591459
}
1460+
1461+
// TestClearTLSParamsFromAdditionalParams tests CLOUDP-351614 fix:
1462+
// When TLS is disabled on AppDB, TLS-specific params should be cleared from
1463+
// the monitoring config's additionalParams to prevent the monitoring agent
1464+
// from trying to use certificate files that no longer exist.
1465+
func TestClearTLSParamsFromAdditionalParams(t *testing.T) {
1466+
tests := []struct {
1467+
name string
1468+
input map[string]string
1469+
expectedOutput map[string]string
1470+
}{
1471+
{
1472+
name: "nil map",
1473+
input: nil,
1474+
expectedOutput: nil,
1475+
},
1476+
{
1477+
name: "empty map",
1478+
input: map[string]string{},
1479+
expectedOutput: map[string]string{},
1480+
},
1481+
{
1482+
name: "only TLS params",
1483+
input: map[string]string{
1484+
"useSslForAllConnections": "true",
1485+
"sslTrustedServerCertificates": "/some/path/ca.pem",
1486+
"sslClientCertificate": "/some/path/cert.pem",
1487+
},
1488+
expectedOutput: map[string]string{},
1489+
},
1490+
{
1491+
name: "mixed params - TLS and non-TLS",
1492+
input: map[string]string{
1493+
"useSslForAllConnections": "true",
1494+
"sslTrustedServerCertificates": "/some/path/ca.pem",
1495+
"sslClientCertificate": "/some/path/cert.pem",
1496+
"someOtherParam": "someValue",
1497+
"anotherParam": "anotherValue",
1498+
},
1499+
expectedOutput: map[string]string{
1500+
"someOtherParam": "someValue",
1501+
"anotherParam": "anotherValue",
1502+
},
1503+
},
1504+
{
1505+
name: "only non-TLS params",
1506+
input: map[string]string{
1507+
"someOtherParam": "someValue",
1508+
"anotherParam": "anotherValue",
1509+
},
1510+
expectedOutput: map[string]string{
1511+
"someOtherParam": "someValue",
1512+
"anotherParam": "anotherValue",
1513+
},
1514+
},
1515+
{
1516+
name: "partial TLS params",
1517+
input: map[string]string{
1518+
"useSslForAllConnections": "true",
1519+
"someOtherParam": "someValue",
1520+
},
1521+
expectedOutput: map[string]string{
1522+
"someOtherParam": "someValue",
1523+
},
1524+
},
1525+
}
1526+
1527+
for _, tt := range tests {
1528+
t.Run(tt.name, func(t *testing.T) {
1529+
clearTLSParamsFromAdditionalParams(tt.input)
1530+
assert.Equal(t, tt.expectedOutput, tt.input)
1531+
})
1532+
}
1533+
}

controllers/operator/common_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1075,7 +1075,7 @@ func ReconcileReplicaSetAC(ctx context.Context, d om.Deployment, spec mdbv1.DbCo
10751075
}
10761076

10771077
d.MergeReplicaSet(rs, spec.GetAdditionalMongodConfig().ToMap(), lastMongodConfig, log)
1078-
d.AddMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
1078+
d.ConfigureMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
10791079
d.ConfigureTLS(spec.GetSecurity(), caFilePath)
10801080
d.ConfigureInternalClusterAuthentication(rs.GetProcessNames(), spec.GetSecurity().GetInternalClusterAuthenticationMode(), internalClusterPath)
10811081

controllers/operator/mongodbshardedcluster_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2006,7 +2006,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c
20062006
return err
20072007
}
20082008

2009-
d.AddMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
2009+
d.ConfigureMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
20102010
d.ConfigureTLS(sc.Spec.GetSecurity(), opts.caFilePath)
20112011

20122012
setupInternalClusterAuth(d, sc.Name, sc.GetSecurity().GetInternalClusterAuthenticationMode(),

controllers/operator/mongodbshardedcluster_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1676,7 +1676,7 @@ func createDeploymentFromShardedCluster(t *testing.T, updatable v1.CustomResourc
16761676
Finalizing: false,
16771677
})
16781678
assert.NoError(t, err)
1679-
d.AddMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
1679+
d.ConfigureMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
16801680
return d
16811681
}
16821682

0 commit comments

Comments
 (0)