Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog/20251216_fix_tls_monitoring.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
kind: fix
date: 2025-12-16
---

* Fixed an issue where monitoring agents would fail after disabling TLS on a MongoDB deployment.
74 changes: 57 additions & 17 deletions controllers/om/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,16 @@ func (d Deployment) MergeShardedCluster(opts DeploymentShardedClusterMergeOption
return shardsScheduledForRemoval, nil
}

// AddMonitoringAndBackup adds monitoring and backup agents to each process
// The automation agent will update the agents versions to the latest version automatically
// ConfigureMonitoringAndBackup configures monitoring and backup agents for each process.
// This is called on every reconcile to ensure the monitoring/backup config matches the desired state.
// The automation agent will update the agents versions to the latest version automatically.
// Note, that these two are deliberately combined as all clients (standalone, rs etc.) need both backup and monitoring
// together
func (d Deployment) AddMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
// together.
func (d Deployment) ConfigureMonitoringAndBackup(log *zap.SugaredLogger, tls bool, caFilepath string) {
if len(d.getProcesses()) == 0 {
return
}
d.AddMonitoring(log, tls, caFilepath)
d.ConfigureMonitoring(log, tls, caFilepath)
d.addBackup(log)
}

Expand All @@ -277,8 +278,9 @@ func (d Deployment) GetReplicaSetByName(name string) ReplicaSet {
return nil
}

// AddMonitoring adds monitoring agents for all processes in the deployment
func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
// ConfigureMonitoring configures monitoring agents for all processes in the deployment.
// This is called on every reconcile to ensure the monitoring config matches the desired state.
func (d Deployment) ConfigureMonitoring(log *zap.SugaredLogger, tls bool, caFilePath string) {
if len(d.getProcesses()) == 0 {
return
}
Expand Down Expand Up @@ -306,23 +308,61 @@ func (d Deployment) AddMonitoring(log *zap.SugaredLogger, tls bool, caFilePath s
monitoringVersion["hostname"] = p.HostName()

if tls {
additionalParams := map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": caFilePath,
pemKeyFile := ""
if pem := p.EnsureTLSConfig()["PEMKeyFile"]; pem != nil {
pemKeyFile = pem.(string)
}

pemKeyFile := p.EnsureTLSConfig()["PEMKeyFile"]
if pemKeyFile != nil {
additionalParams["sslClientCertificate"] = pemKeyFile.(string)
}

monitoringVersion["additionalParams"] = additionalParams
params := map[string]string{}
addTLSParams(params, caFilePath, pemKeyFile)
monitoringVersion["additionalParams"] = params
} else {
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
// trying to use certificate files that no longer exist.
// We only clear TLS fields, preserving any other additionalParams that may be set.
clearTLSParamsFromMonitoringVersion(monitoringVersion)
}

}
d.setMonitoringVersions(monitoringVersions)
}

// TLS param keys for monitoring additionalParams.
const (
tlsParamUseSsl = "useSslForAllConnections"
tlsParamTrustedCert = "sslTrustedServerCertificates"
tlsParamClientCert = "sslClientCertificate"
)

func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) {
params[tlsParamUseSsl] = "true"
params[tlsParamTrustedCert] = caFilePath
if pemKeyFile != "" {
params[tlsParamClientCert] = pemKeyFile
}
}

func clearTLSParams(params map[string]string) {
delete(params, tlsParamUseSsl)
delete(params, tlsParamTrustedCert)
delete(params, tlsParamClientCert)
}

// clearTLSParamsFromMonitoringVersion removes TLS-specific fields from the monitoring
// version's additionalParams. If additionalParams becomes empty after removing TLS fields,
// the entire map is removed.
func clearTLSParamsFromMonitoringVersion(monitoringVersion map[string]interface{}) {
additionalParams, ok := monitoringVersion["additionalParams"].(map[string]string)
if !ok {
return
}

clearTLSParams(additionalParams)

if len(additionalParams) == 0 {
delete(monitoringVersion, "additionalParams")
}
}

// RemoveMonitoringAndBackup removes both monitoring and backup agent configurations. This must be called when the
// Mongodb resource is being removed, otherwise UI will show non-existing agents in the "servers" tab
func (d Deployment) RemoveMonitoringAndBackup(names []string, log *zap.SugaredLogger) {
Expand Down
2 changes: 1 addition & 1 deletion controllers/om/deployment/testing_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func CreateFromReplicaSet(mongoDBImage string, forceEnterprise bool, rs *mdb.Mon
lastConfig.ToMap(),
zap.S(),
)
d.AddMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
d.ConfigureMonitoringAndBackup(zap.S(), rs.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
d.ConfigureTLS(rs.Spec.GetSecurity(), util.CAFilePathInContainer)
return d
}
41 changes: 35 additions & 6 deletions controllers/om/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,12 +489,12 @@ func TestConfiguringTlsProcessFromOpsManager(t *testing.T) {
}
}

func TestAddMonitoring(t *testing.T) {
func TestConfigureMonitoring(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)

expectedMonitoringVersions := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
Expand All @@ -504,16 +504,16 @@ func TestAddMonitoring(t *testing.T) {
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())

// adding again - nothing changes
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
}

func TestAddMonitoringTls(t *testing.T) {
func TestConfigureMonitoringTls(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.AddMonitoring(zap.S(), true, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)

expectedAdditionalParams := map[string]string{
"useSslForAllConnections": "true",
Expand All @@ -528,10 +528,39 @@ func TestAddMonitoringTls(t *testing.T) {
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())

// adding again - nothing changes
d.AddMonitoring(zap.S(), false, util.CAFilePathInContainer)
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)
assert.Equal(t, expectedMonitoringVersions, d.getMonitoringVersions())
}

func TestConfigureMonitoringTLSDisable(t *testing.T) {
d := NewDeployment()

rs0 := buildRsByProcesses("my-rs", createReplicaSetProcessesCount(3, "my-rs"))
d.MergeReplicaSet(rs0, nil, nil, zap.S())
d.ConfigureMonitoring(zap.S(), true, util.CAFilePathInContainer)

// verify TLS is present in additionalParams
expectedAdditionalParams := map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": util.CAFilePathInContainer,
}
expectedMonitoringVersionsWithTls := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion, "additionalParams": expectedAdditionalParams},
}
assert.Equal(t, expectedMonitoringVersionsWithTls, d.getMonitoringVersions())

// disabling TLS should clear additionalParams (CLOUDP-351614)
d.ConfigureMonitoring(zap.S(), false, util.CAFilePathInContainer)
expectedMonitoringVersionsWithoutTls := []interface{}{
map[string]interface{}{"hostname": "my-rs-0.some.host", "name": MonitoringAgentDefaultVersion},
map[string]interface{}{"hostname": "my-rs-1.some.host", "name": MonitoringAgentDefaultVersion},
map[string]interface{}{"hostname": "my-rs-2.some.host", "name": MonitoringAgentDefaultVersion},
}
assert.Equal(t, expectedMonitoringVersionsWithoutTls, d.getMonitoringVersions())
}

func TestAddBackup(t *testing.T) {
d := NewDeployment()

Expand Down
52 changes: 39 additions & 13 deletions controllers/operator/appdbreplicaset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ const (
// Used to convey to the operator to force reconfigure agent. At the moment
// it is used for DR in case of Multi-Cluster AppDB when after a cluster outage
// there is no primary in the AppDB deployment.
ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure"

ForceReconfigureAnnotation = "mongodb.com/v1.forceReconfigure"
trueString = "trueString"
ForcedReconfigureAlreadyPerformedAnnotation = "mongodb.com/v1.forceReconfigurePerformed"
)

Expand Down Expand Up @@ -717,7 +717,7 @@ func (r *ReconcileAppDbReplicaSet) ReconcileAppDB(ctx context.Context, opsManage
opsManager.Annotations = map[string]string{}
}

if val, ok := opsManager.Annotations[ForceReconfigureAnnotation]; ok && val == "true" {
if val, ok := opsManager.Annotations[ForceReconfigureAnnotation]; ok && val == trueString {
annotationsToAdd := map[string]string{ForcedReconfigureAlreadyPerformedAnnotation: timeutil.Now()}

err := annotations.SetAnnotations(ctx, opsManager, annotationsToAdd, r.client)
Expand Down Expand Up @@ -1248,7 +1248,7 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex
// it checks this with the user provided annotation and if the operator has actually performed a force reconfigure already
func shouldPerformForcedReconfigure(annotations map[string]string) bool {
if val, ok := annotations[ForceReconfigureAnnotation]; ok {
if val == "true" {
if val == trueString {
if _, ok := annotations[ForcedReconfigureAlreadyPerformedAnnotation]; !ok {
return true
}
Expand Down Expand Up @@ -1430,9 +1430,17 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
monitoringVersions := ac.MonitoringVersions
for _, p := range ac.Processes {
found := false
for _, m := range monitoringVersions {
for i, m := range monitoringVersions {
if m.Hostname == p.HostName {
found = true
if !tls {
// Clear TLS-specific params when TLS is disabled to prevent monitoring from
// trying to use certificate files that no longer exist.
clearTLSParams(monitoringVersions[i].AdditionalParams)
if len(monitoringVersions[i].AdditionalParams) == 0 {
monitoringVersions[i].AdditionalParams = nil
}
}
break
}
}
Expand All @@ -1442,15 +1450,12 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
Name: om.MonitoringAgentDefaultVersion,
}
if tls {
additionalParams := map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": appdbCAFilePath,
pemKeyFile := ""
if pem := p.Args26.Get("net.tls.certificateKeyFile"); pem != nil {
pemKeyFile = pem.String()
}
pemKeyFile := p.Args26.Get("net.tls.certificateKeyFile")
if pemKeyFile != nil {
additionalParams["sslClientCertificate"] = pemKeyFile.String()
}
monitoringVersion.AdditionalParams = additionalParams
monitoringVersion.AdditionalParams = map[string]string{}
addTLSParams(monitoringVersion.AdditionalParams, appdbCAFilePath, pemKeyFile)
}
log.Debugw("Added monitoring agent configuration", "host", p.HostName, "tls", tls)
monitoringVersions = append(monitoringVersions, monitoringVersion)
Expand All @@ -1459,6 +1464,27 @@ func addMonitoring(ac *automationconfig.AutomationConfig, log *zap.SugaredLogger
ac.MonitoringVersions = monitoringVersions
}

// TLS param keys for monitoring additionalParams.
const (
tlsParamUseSsl = "useSslForAllConnections"
tlsParamTrustedCert = "sslTrustedServerCertificates"
tlsParamClientCert = "sslClientCertificate"
)

func addTLSParams(params map[string]string, caFilePath, pemKeyFile string) {
params[tlsParamUseSsl] = trueString
params[tlsParamTrustedCert] = caFilePath
if pemKeyFile != "" {
params[tlsParamClientCert] = pemKeyFile
}
}

func clearTLSParams(params map[string]string) {
delete(params, tlsParamUseSsl)
delete(params, tlsParamTrustedCert)
delete(params, tlsParamClientCert)
}

// registerAppDBHostsWithProject uses the Hosts API to add each process in the AppDB to the project
func (r *ReconcileAppDbReplicaSet) registerAppDBHostsWithProject(hostnames []string, conn om.Connection, opsManagerPassword string, log *zap.SugaredLogger) error {
getHostsResult, err := conn.GetHosts()
Expand Down
74 changes: 74 additions & 0 deletions controllers/operator/appdbreplicaset_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1457,3 +1457,77 @@ func createRunningAppDB(ctx context.Context, t *testing.T, startingMembers int,
assert.Equal(t, ok, res)
return reconciler
}

// TestClearTLSParams tests CLOUDP-351614 fix:
// When TLS is disabled on AppDB, TLS-specific params should be cleared from
// the monitoring config's additionalParams to prevent the monitoring agent
// from trying to use certificate files that no longer exist.
func TestClearTLSParams(t *testing.T) {
tests := []struct {
name string
input map[string]string
expectedOutput map[string]string
}{
{
name: "nil map",
input: nil,
expectedOutput: nil,
},
{
name: "empty map",
input: map[string]string{},
expectedOutput: map[string]string{},
},
{
name: "only TLS params",
input: map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": "/some/path/ca.pem",
"sslClientCertificate": "/some/path/cert.pem",
},
expectedOutput: map[string]string{},
},
{
name: "mixed params - TLS and non-TLS",
input: map[string]string{
"useSslForAllConnections": "true",
"sslTrustedServerCertificates": "/some/path/ca.pem",
"sslClientCertificate": "/some/path/cert.pem",
"someOtherParam": "someValue",
"anotherParam": "anotherValue",
},
expectedOutput: map[string]string{
"someOtherParam": "someValue",
"anotherParam": "anotherValue",
},
},
{
name: "only non-TLS params",
input: map[string]string{
"someOtherParam": "someValue",
"anotherParam": "anotherValue",
},
expectedOutput: map[string]string{
"someOtherParam": "someValue",
"anotherParam": "anotherValue",
},
},
{
name: "partial TLS params",
input: map[string]string{
"useSslForAllConnections": "true",
"someOtherParam": "someValue",
},
expectedOutput: map[string]string{
"someOtherParam": "someValue",
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
clearTLSParams(tt.input)
assert.Equal(t, tt.expectedOutput, tt.input)
})
}
}
2 changes: 1 addition & 1 deletion controllers/operator/common_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,7 @@ func ReconcileReplicaSetAC(ctx context.Context, d om.Deployment, spec mdbv1.DbCo
}

d.MergeReplicaSet(rs, spec.GetAdditionalMongodConfig().ToMap(), lastMongodConfig, log)
d.AddMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
d.ConfigureMonitoringAndBackup(log, spec.GetSecurity().IsTLSEnabled(), caFilePath)
d.ConfigureTLS(spec.GetSecurity(), caFilePath)
d.ConfigureInternalClusterAuthentication(rs.GetProcessNames(), spec.GetSecurity().GetInternalClusterAuthenticationMode(), internalClusterPath)

Expand Down
2 changes: 1 addition & 1 deletion controllers/operator/mongodbshardedcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2006,7 +2006,7 @@ func (r *ShardedClusterReconcileHelper) publishDeployment(ctx context.Context, c
return err
}

d.AddMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
d.ConfigureMonitoringAndBackup(log, sc.Spec.GetSecurity().IsTLSEnabled(), opts.caFilePath)
d.ConfigureTLS(sc.Spec.GetSecurity(), opts.caFilePath)

setupInternalClusterAuth(d, sc.Name, sc.GetSecurity().GetInternalClusterAuthenticationMode(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1676,7 +1676,7 @@ func createDeploymentFromShardedCluster(t *testing.T, updatable v1.CustomResourc
Finalizing: false,
})
assert.NoError(t, err)
d.AddMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
d.ConfigureMonitoringAndBackup(zap.S(), sh.Spec.GetSecurity().IsTLSEnabled(), util.CAFilePathInContainer)
return d
}

Expand Down
Loading