From 18f555e700ecdef33494f1310cd138310cae9610 Mon Sep 17 00:00:00 2001 From: Hongkai Liu Date: Fri, 19 Dec 2025 13:00:11 +0800 Subject: [PATCH] CO must go Progressing during a minor-level upgrade Follow up [1]: in CI tests there are many patch-level upgrade we made between the same payload manifests with a renamed version. In those cases, it is OK that CO does not report Progressing. Ideally, CI should enforce Progressing only when there are changes. Before there is a practical way to achieve it, we focus on minor upgrades only. [1]. https://issues.redhat.com/browse/OCPSTRAT-2484?focusedId=28681908&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-28681908 --- .../legacycvomonitortests/monitortest.go | 2 +- .../legacycvomonitortests/operators.go | 43 +++++++++++- .../legacycvomonitortests/operators_test.go | 67 +++++++++++++++++++ 3 files changed, 110 insertions(+), 2 deletions(-) diff --git a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go index 0f1b838d2852..9f343f64f43a 100644 --- a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go +++ b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/monitortest.go @@ -91,7 +91,7 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{}) if isUpgrade { junits = append(junits, testUpgradeOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...) - junits = append(junits, testUpgradeOperatorProgressingStateTransitions(finalIntervals)...) + junits = append(junits, testUpgradeOperatorProgressingStateTransitions(finalIntervals, w.adminRESTConfig)...) } else { junits = append(junits, testStableSystemOperatorStateTransitions(finalIntervals, w.adminRESTConfig)...) } diff --git a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go index 8c1af2645f70..3c56470637a0 100644 --- a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go +++ b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go @@ -6,6 +6,7 @@ import ( "strings" "time" + "github.com/blang/semver/v4" configv1 "github.com/openshift/api/config/v1" clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1" "github.com/sirupsen/logrus" @@ -641,7 +642,7 @@ func testOperatorStateTransitions(events monitorapi.Intervals, conditionTypes [] return ret } -func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals) []*junitapi.JUnitTestCase { +func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals, clientConfig *rest.Config) []*junitapi.JUnitTestCase { var ret []*junitapi.JUnitTestCase upgradeWindows := getUpgradeWindows(events) multiUpgrades := platformidentification.UpgradeNumberDuringCollection(events, time.Time{}, time.Time{}) > 1 @@ -720,6 +721,11 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals) return "" } + patch, err := patchUpgrade(clientConfig) + if err != nil { + logrus.Warnf("Error checking the upgrade level: %v", err) + } + // Each cluster operator must report Progressing=True during cluster upgrade for _, operatorName := range platformidentification.KnownOperators.List() { bzComponent := platformidentification.GetBugzillaComponentForOperator(operatorName) @@ -733,6 +739,10 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals) mcTestCase.SkipMessage = &junitapi.SkipMessage{ Message: "Test skipped in a multi-upgrade test", } + } else if patch { + mcTestCase.SkipMessage = &junitapi.SkipMessage{ + Message: "Test skipped in a patch-level upgrade test", + } } else if t, ok := coProgressingStart[operatorName]; !ok || t.IsZero() { output := fmt.Sprintf("clusteroperator/%s was never Progressing=True during the upgrade window from %s to %s", operatorName, start.Format(time.RFC3339), stop.Format(time.RFC3339)) exception = except(operatorName, "") @@ -917,6 +927,37 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals) return ret } +func patchUpgrade(config *rest.Config) (bool, error) { + configClient, err := clientconfigv1.NewForConfig(config) + if err != nil { + return false, err + } + return patchUpgradeWithConfigClient(configClient) +} +func patchUpgradeWithConfigClient(c clientconfigv1.ConfigV1Interface) (bool, error) { + cv, err := c.ClusterVersions().Get(context.Background(), "version", metav1.GetOptions{}) + if err != nil { + return false, err + } + history := cv.Status.History + if l := len(history); l < 2 { + return false, fmt.Errorf("not long enough (>1) history for versions in ClusterVersion/version for upgrade, found %d", l) + } + + target := history[0].Version + previous := history[1].Version + + targetVersion, err := semver.Parse(target) + if err != nil { + return false, err + } + previousVersion, err := semver.Parse(previous) + if err != nil { + return false, err + } + return previousVersion.Major == targetVersion.Major && previousVersion.Minor == targetVersion.Minor, nil +} + func fromAndTo(intervals monitorapi.Intervals) (time.Time, time.Time) { var from, to time.Time for _, interval := range intervals { diff --git a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators_test.go b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators_test.go index 6dc7a7e3d65c..6289713c685a 100644 --- a/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators_test.go +++ b/pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators_test.go @@ -5,6 +5,7 @@ import ( "time" configv1 "github.com/openshift/api/config/v1" + "github.com/openshift/client-go/config/clientset/versioned/fake" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" @@ -295,3 +296,69 @@ func Test_updateCOWaiting(t *testing.T) { }) } } + +func Test_patchUpgradeWithConfigClient(t *testing.T) { + tests := []struct { + name string + cv *configv1.ClusterVersion + expect bool + expectErrMsg string + }{ + { + name: "nil", + cv: &configv1.ClusterVersion{}, + expectErrMsg: "clusterversions.config.openshift.io \"version\" not found", + }, + { + name: "no history", + cv: &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{Name: "version"}, + }, + expectErrMsg: "not long enough (>1) history for versions in ClusterVersion/version for upgrade, found 0", + }, + { + name: "minor", + cv: &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{Name: "version"}, + Status: configv1.ClusterVersionStatus{ + History: []configv1.UpdateHistory{ + { + Version: "4.11.0", + }, + { + Version: "4.12.0", + }, + }, + }, + }, + }, + { + name: "minor", + cv: &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{Name: "version"}, + Status: configv1.ClusterVersionStatus{ + History: []configv1.UpdateHistory{ + { + Version: "4.11.1", + }, + { + Version: "4.11.0", + }, + }, + }, + }, + expect: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual, actualErr := patchUpgradeWithConfigClient(fake.NewClientset(tt.cv).ConfigV1()) + if tt.expectErrMsg != "" { + assert.EqualError(t, actualErr, tt.expectErrMsg) + } else { + assert.Nil(t, actualErr) + } + assert.Equal(t, tt.expect, actual) + }) + } +}