From 7657bcc822636b238ea2145d21ccf7ee01663d22 Mon Sep 17 00:00:00 2001 From: xueqzhan Date: Wed, 29 Oct 2025 09:01:56 -0400 Subject: [PATCH] Revert "Merge pull request #30296 from hongkailiu/OTA-1637-reboot" This reverts commit e603cd9c33b2c1dcf529bb19c2b87c52e4a98c65, reversing changes made to b60bbfe4f1d983a2447ddae4e340b067b5641e30. --- .../legacycvomonitortests/monitortest.go | 1 - .../legacycvomonitortests/operators.go | 210 +----------------- 2 files changed, 10 insertions(+), 201 deletions(-) diff --git a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go index 1c9135e4955f..b07977b0a376 100644 --- a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go +++ b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go @@ -45,7 +45,6 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{}) if isUpgrade { junits = append(junits, testUpgradeOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...) - junits = append(junits, clusterOperatorIsNotProgressingWhenMachineConfigIs(finalIntervals)...) } else { junits = append(junits, testStableSystemOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...) } diff --git a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go index fa99ed61b5c7..ce019a78976d 100644 --- a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go +++ b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go @@ -6,21 +6,21 @@ import ( "strings" "time" - configv1 "github.com/openshift/api/config/v1" - clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" - "github.com/sirupsen/logrus" + "github.com/openshift/origin/pkg/monitortestlibrary/utility" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" + "github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer" + "github.com/sirupsen/logrus" + + configv1 "github.com/openshift/api/config/v1" + clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" "github.com/openshift/origin/pkg/monitor/monitorapi" "github.com/openshift/origin/pkg/monitortestlibrary/platformidentification" platformidentification2 "github.com/openshift/origin/pkg/monitortestlibrary/platformidentification" - "github.com/openshift/origin/pkg/monitortestlibrary/utility" - "github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer" "github.com/openshift/origin/pkg/test/ginkgo/junitapi" exutil "github.com/openshift/origin/test/extended/util" + "k8s.io/client-go/rest" ) // exceptionCallback consumes a suspicious condition and returns an @@ -516,6 +516,9 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes [] for _, conditionType := range conditionTypes { for _, operatorName := range platformidentification.KnownOperators.List() { bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName) + if bzComponent == "Unknown" { + bzComponent = operatorName + } testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should not change condition/%v", bzComponent, operatorName, conditionType) operatorEvents := eventsByOperator[operatorName] if len(operatorEvents) == 0 { @@ -583,9 +586,6 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes [] } if len(fatal) > 0 || len(excepted) > 0 { - // add a failure so we - // either flake (or pass) in case len(fatal) == 0 by adding a success to the same test - // or fail in case len(fatal) > 0 by leaving the failure as the only output for the test ret = append(ret, &junitapi.JUnitTestCase{ Name: testName, Duration: duration, @@ -597,197 +597,7 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes [] } if len(fatal) == 0 { - if len(excepted) > 0 { - // add a success so we flake (or pass) and don't fail - ret = append(ret, &junitapi.JUnitTestCase{Name: testName, SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected"}) - } else { - ret = append(ret, &junitapi.JUnitTestCase{Name: testName}) - } - } - } - } - - return ret -} - -func clusterOperatorIsNotProgressingWhenMachineConfigIs(events monitorapi.Intervals) []*junitapi.JUnitTestCase { - var ret []*junitapi.JUnitTestCase - upgradeWindows := getUpgradeWindows(events) - - var machineConfigProgressingStart time.Time - var eventsInUpgradeWindows monitorapi.Intervals - - var start, stop time.Time - for _, event := range events { - if !isInUpgradeWindow(upgradeWindows, event) { - continue - } - eventsInUpgradeWindows = append(eventsInUpgradeWindows, event) - if start.IsZero() || event.From.Before(start) { - start = event.From - } - if stop.IsZero() || event.To.After(stop) { - stop = event.To - } - } - duration := stop.Sub(start).Seconds() - - eventsByOperator := getEventsByOperator(eventsInUpgradeWindows) - for _, mcEvent := range eventsByOperator["machine-config"] { - condition := monitorapi.GetOperatorConditionStatus(mcEvent) - if condition == nil { - continue // ignore non-condition intervals - } - if condition.Type == configv1.OperatorProgressing && condition.Status == configv1.ConditionTrue { - machineConfigProgressingStart = mcEvent.To - break - } - } - - mcTestCase := &junitapi.JUnitTestCase{ - Name: fmt.Sprintf("[bz-Machine Config Operator] clusteroperator/machine-config must go Progressing=True during an upgrade test"), - Duration: duration, - } - if machineConfigProgressingStart.IsZero() { - mcTestCase.FailureOutput = &junitapi.FailureOutput{ - Output: fmt.Sprintf("machine-config was never Progressing=True during the upgrade window from %s to %s", start.Format(time.RFC3339), stop.Format(time.RFC3339)), - } - return []*junitapi.JUnitTestCase{mcTestCase} - } else { - mcTestCase.SystemOut = fmt.Sprintf("machine-config became Progressing=True at %s during the upgrade window from %s to %s", machineConfigProgressingStart.Format(time.RFC3339), start.Format(time.RFC3339), stop.Format(time.RFC3339)) - } - ret = append(ret, mcTestCase) - - for _, operatorName := range platformidentification.KnownOperators.Difference(sets.NewString("machine-config")).List() { - bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName) - testName := fmt.Sprintf("[bz-%v] clusteroperator/%v should stay Progressing=False while MCO is Progressing=True", bzComponent, operatorName) - operatorEvents := eventsByOperator[operatorName] - if len(operatorEvents) == 0 { - ret = append(ret, &junitapi.JUnitTestCase{ - Name: testName, - Duration: duration, - }) - continue - } - - except := func(co string, reason string) string { - switch co { - case "csi-snapshot-controller": - if reason == "CSISnapshotController_Deploying" { - return "https://issues.redhat.com/browse/OCPBUGS-62624" - } - case "dns": - if reason == "DNSReportsProgressingIsTrue" { - return "https://issues.redhat.com/browse/OCPBUGS-62623" - } - case "image-registry": - if reason == "NodeCADaemonUnavailable::Ready" || reason == "DeploymentNotCompleted" { - return "https://issues.redhat.com/browse/OCPBUGS-62626" - } - case "ingress": - if reason == "Reconciling" { - return "https://issues.redhat.com/browse/OCPBUGS-62627" - } - case "kube-storage-version-migrator": - if reason == "KubeStorageVersionMigrator_Deploying" { - return "https://issues.redhat.com/browse/OCPBUGS-62629" - } - case "network": - if reason == "Deploying" { - return "https://issues.redhat.com/browse/OCPBUGS-62630" - } - case "node-tuning": - if reason == "Reconciling" { - return "https://issues.redhat.com/browse/OCPBUGS-62632" - } - case "openshift-controller-manager": - if reason == "_DesiredStateNotYetAchieved" { - return "https://issues.redhat.com/browse/OCPBUGS-63116" - } - case "service-ca": - if reason == "_ManagedDeploymentsAvailable" { - return "https://issues.redhat.com/browse/OCPBUGS-62633" - } - case "storage": - // GCPPDCSIDriverOperatorCR_GCPPDDriverControllerServiceController_Deploying - // GCPPDCSIDriverOperatorCR_GCPPDDriverNodeServiceController_Deploying - // AWSEBSCSIDriverOperatorCR_AWSEBSDriverNodeServiceController_Deploying - // VolumeDataSourceValidatorDeploymentController_Deploying - if strings.HasSuffix(reason, "Controller_Deploying") || - reason == "GCPPD_Deploying" { - return "https://issues.redhat.com/browse/OCPBUGS-62634" - } - case "olm": - // CatalogdDeploymentCatalogdControllerManager_Deploying - // OperatorcontrollerDeploymentOperatorControllerControllerManager_Deploying - if strings.HasSuffix(reason, "ControllerManager_Deploying") { - return "https://issues.redhat.com/browse/OCPBUGS-62635" - } - } - return "" - } - - var excepted, fatal []string - for _, operatorEvent := range operatorEvents { - if operatorEvent.From.Before(machineConfigProgressingStart) { - continue - } - condition := monitorapi.GetOperatorConditionStatus(operatorEvent) - if condition == nil { - continue // ignore non-condition intervals - } - if condition.Type == "" { - fatal = append(fatal, fmt.Sprintf("failed to convert %v into a condition with a type", operatorEvent)) - continue - } - - if condition.Type != configv1.OperatorProgressing || condition.Status == configv1.ConditionFalse { - continue - } - - // if there was any switch, it was wrong/unexpected at some point - failure := fmt.Sprintf("%v", operatorEvent) - - exception := except(operatorName, condition.Reason) - if exception == "" { - fatal = append(fatal, failure) - } else { - excepted = append(excepted, fmt.Sprintf("%s (exception: %s)", failure, exception)) - } - } - - output := fmt.Sprintf("%d (out of %d) unexpected clusteroperator state transitions while machine-config is progressing during the upgrade window from %s to %s", len(fatal), len(operatorEvents), start.Format(time.RFC3339), stop.Format(time.RFC3339)) - if len(fatal) > 0 { - output = fmt.Sprintf("%s. These did not match any known exceptions, so they cause this test-case to fail:\n\n%v\n", output, strings.Join(fatal, "\n")) - } else { - output = fmt.Sprintf("%s, as desired.", output) - } - output = fmt.Sprintf("%s\n%d unwelcome but acceptable clusteroperator state transitions while machine-config is progressing during the upgrade window from %s to %s", output, len(excepted), start.Format(time.RFC3339), stop.Format(time.RFC3339)) - if len(excepted) > 0 { - output = fmt.Sprintf("%s. These should not happen, but because they are tied to exceptions, the fact that they did happen is not sufficient to cause this test-case to fail:\n\n%v\n", output, strings.Join(excepted, "\n")) - } else { - output = fmt.Sprintf("%s, as desired.", output) - } - - if len(fatal) > 0 || len(excepted) > 0 { - // add a failure so we - // either flake (or pass) in case len(fatal) == 0 by adding a success to the same test - // or fail in case len(fatal) > 0 by leaving the failure as the only output for the test - ret = append(ret, &junitapi.JUnitTestCase{ - Name: testName, - Duration: duration, - SystemOut: output, - FailureOutput: &junitapi.FailureOutput{ - Output: output, - }, - }) - } - - if len(fatal) == 0 { - if len(excepted) > 0 { // add a success so we flake (or pass) and don't fail - ret = append(ret, &junitapi.JUnitTestCase{Name: testName, SystemOut: "Passing the case to make the overall test case flake as the previous failure is expected"}) - } else { ret = append(ret, &junitapi.JUnitTestCase{Name: testName}) } }