diff --git a/pkg/test/extensions/global_node_detection.go b/pkg/test/extensions/global_node_detection.go new file mode 100644 index 000000000000..a83d38dc4c16 --- /dev/null +++ b/pkg/test/extensions/global_node_detection.go @@ -0,0 +1,254 @@ +package extensions + +import ( + "fmt" + "path/filepath" + "sort" + "strings" + + "github.com/openshift/origin/pkg/test/ginkgo/junitapi" +) + +// binaryKey uniquely identifies a test binary by its image and path. +type binaryKey struct { + imageTag string + binaryPath string +} + +// binaryResult holds the detection results for a single binary. +type binaryResult struct { + binaryName string // Just the filename, not the full path + globalLocations []string + totalTests int +} + +// imageResult groups all binary results for a single image. +type imageResult struct { + imageTag string + binaries []binaryResult +} + +// CheckForGlobalNodes checks if any code locations appear in ALL tests for each binary, +// which indicates that BeforeEach/AfterEach nodes were registered at the global level +// (root of the Ginkgo tree). This is a serious bug because these hooks run for EVERY test, +// wasting resources and time. In some cases, these global nodes can +// interfere with test operations. +// +// Tests are grouped by their source image, and each binary within an image is checked separately. +// +// Returns JUnit test cases as flakes (failing + passing with same name) for each image +// that has global nodes detected. This allows the issue to be tracked in CI without +// blocking the test run for now. +func CheckForGlobalNodes(specs ExtensionTestSpecs) []*junitapi.JUnitTestCase { + // Group tests by image tag and binary path + specsByBinary := make(map[binaryKey]ExtensionTestSpecs) + for _, spec := range specs { + if spec.Binary == nil { + continue + } + key := binaryKey{ + imageTag: spec.Binary.imageTag, + binaryPath: spec.Binary.binaryPath, + } + specsByBinary[key] = append(specsByBinary[key], spec) + } + + // Check each binary for global nodes, grouped by image + resultsByImage := make(map[string][]binaryResult) + + for key, binarySpecs := range specsByBinary { + // Skip binaries with fewer than 25 tests - can't detect global nodes meaningfully + // with small test counts as it would generate false positives + if len(binarySpecs) < 25 { + continue + } + + totalTests := len(binarySpecs) + + // Count how many unique tests contain each code location + locationToTests := make(map[string]map[string]struct{}) + for _, spec := range binarySpecs { + for _, loc := range spec.CodeLocations { + if locationToTests[loc] == nil { + locationToTests[loc] = make(map[string]struct{}) + } + locationToTests[loc][spec.Name] = struct{}{} + } + } + + // Find code locations that appear in ALL tests for this binary + var globalLocations []string + for loc, tests := range locationToTests { + if len(tests) != totalTests { + continue + } + + // Skip locations that are expected to be in all tests + if isExpectedGlobalLocation(loc) { + continue + } + + globalLocations = append(globalLocations, loc) + } + + if len(globalLocations) > 0 { + sort.Strings(globalLocations) + // Use just the binary filename, not the full extracted path + binaryName := filepath.Base(key.binaryPath) + resultsByImage[key.imageTag] = append(resultsByImage[key.imageTag], binaryResult{ + binaryName: binaryName, + globalLocations: globalLocations, + totalTests: totalTests, + }) + } + } + + if len(resultsByImage) == 0 { + return nil + } + + // Build sorted list of image results + var imageResults []imageResult + for imageTag, binaries := range resultsByImage { + // Sort binaries within each image for consistent output + sort.Slice(binaries, func(i, j int) bool { + return binaries[i].binaryName < binaries[j].binaryName + }) + imageResults = append(imageResults, imageResult{ + imageTag: imageTag, + binaries: binaries, + }) + } + + // Sort by imageTag for consistent output + sort.Slice(imageResults, func(i, j int) bool { + return imageResults[i].imageTag < imageResults[j].imageTag + }) + + // Create JUnit test cases as flakes (one failing, one passing per image) + var testCases []*junitapi.JUnitTestCase + + for _, result := range imageResults { + testName := fmt.Sprintf("[sig-ci] image %s should not have global BeforeEach/AfterEach nodes", result.imageTag) + + // Build detailed failure message + failureOutput := buildGlobalNodeFailureMessage(result.imageTag, result.binaries) + + // Count total global locations across all binaries + totalLocations := 0 + for _, b := range result.binaries { + totalLocations += len(b.globalLocations) + } + + // Create failing test case + failingCase := &junitapi.JUnitTestCase{ + Name: testName, + FailureOutput: &junitapi.FailureOutput{ + Message: fmt.Sprintf("Found %d global BeforeEach/AfterEach code locations in image %s across %d binary(ies)", totalLocations, result.imageTag, len(result.binaries)), + Output: failureOutput, + }, + } + testCases = append(testCases, failingCase) + + // Create passing test case (same name = flake) + passingCase := &junitapi.JUnitTestCase{ + Name: testName, + } + testCases = append(testCases, passingCase) + } + + return testCases +} + +// buildGlobalNodeFailureMessage creates a detailed message explaining the global node issue. +func buildGlobalNodeFailureMessage(imageTag string, binaries []binaryResult) string { + var sb strings.Builder + + sb.WriteString("\n") + sb.WriteString("╔══════════════════════════════════════════════════════════════════════════════╗\n") + sb.WriteString("║ GLOBAL BEFOREEACH/AFTEREACH DETECTED ║\n") + sb.WriteString("╚══════════════════════════════════════════════════════════════════════════════╝\n") + sb.WriteString("\n") + sb.WriteString(fmt.Sprintf("IMAGE: %s\n", imageTag)) + sb.WriteString("\n") + + for _, binary := range binaries { + sb.WriteString(fmt.Sprintf("BINARY: %s (%d tests)\n", binary.binaryName, binary.totalTests)) + sb.WriteString("GLOBAL CODE LOCATIONS:\n") + for _, loc := range binary.globalLocations { + sb.WriteString(fmt.Sprintf(" • %s\n", loc)) + } + sb.WriteString("\n") + } + + sb.WriteString("PROBLEM: The code locations above appear in ALL tests for their respective binaries,\n") + sb.WriteString("indicating that BeforeEach or AfterEach hooks were registered at the global level.\n") + sb.WriteString("\n") + sb.WriteString("This means these hooks run for EVERY SINGLE TEST even when not needed,\n") + sb.WriteString("wasting CI resources and adding unnecessary test execution time.\n") + sb.WriteString("\n") + sb.WriteString("COMMON CAUSES:\n") + sb.WriteString("\n") + sb.WriteString("1. Package-level FixturePath() call:\n") + sb.WriteString(" BAD: var myFixture = exutil.FixturePath(\"testdata\", \"file.yaml\")\n") + sb.WriteString(" GOOD: func myFixture() string { return exutil.FixturePath(\"testdata\", \"file.yaml\") }\n") + sb.WriteString("\n") + sb.WriteString("2. Package-level NewCLI/NewCLIWithoutNamespace call:\n") + sb.WriteString(" BAD: var oc = exutil.NewCLIWithoutNamespace(\"test\")\n") + sb.WriteString(" GOOD: Inside g.Describe(): oc := exutil.NewCLIWithoutNamespace(\"test\")\n") + sb.WriteString("\n") + sb.WriteString("HOW TO FIX:\n") + sb.WriteString("\n") + sb.WriteString("1. Find the file and line number from the locations above\n") + sb.WriteString("2. Look for package-level var declarations that call FixturePath() or NewCLI*()\n") + sb.WriteString("3. Convert them to functions (for FixturePath) or move inside Describe blocks (for CLI)\n") + sb.WriteString("\n") + sb.WriteString("EXAMPLE FIX FOR FixturePath:\n") + sb.WriteString("\n") + sb.WriteString(" // BEFORE (causes global BeforeEach):\n") + sb.WriteString(" var myFixture = exutil.FixturePath(\"testdata\", \"file.yaml\")\n") + sb.WriteString(" func useFixture() { oc.Run(\"apply\").Args(\"-f\", myFixture) }\n") + sb.WriteString("\n") + sb.WriteString(" // AFTER (lazy evaluation, no global BeforeEach):\n") + sb.WriteString(" func myFixture() string { return exutil.FixturePath(\"testdata\", \"file.yaml\") }\n") + sb.WriteString(" func useFixture() { oc.Run(\"apply\").Args(\"-f\", myFixture()) }\n") + sb.WriteString("\n") + sb.WriteString("FOR AI ASSISTANTS:\n") + sb.WriteString("If you're Claude or another AI helping fix this, search the codebase for the file\n") + sb.WriteString("paths shown above. Look for package-level 'var' declarations that call\n") + sb.WriteString("exutil.FixturePath() or exutil.NewCLI*(). Convert FixturePath vars to functions\n") + sb.WriteString("that return the path. Move NewCLI* calls inside Ginkgo Describe/Context blocks.\n") + sb.WriteString("\n") + + return sb.String() +} + +// allowedGlobalLocations is an exception list of code locations that are known +// to register global BeforeEach/AfterEach nodes. These should be fixed +// unless it is intentional. +var allowedGlobalLocations = map[string]string{ + // Add exceptions here with an explanation, e.g.: + // "github.com/openshift/origin/test/extended/networking/util.go:82": "FIXME: package-level FixturePath call", +} + +// isExpectedGlobalLocation returns true for code locations that are expected +// to appear in all tests and should not trigger the global node detection. +func isExpectedGlobalLocation(loc string) bool { + // Check exact match in allowlist + if _, ok := allowedGlobalLocations[loc]; ok { + return true + } + + // Check pattern matches for framework infrastructure that's legitimately global + expectedPatterns := []string{ + // None currently - if we find legitimate cases, add them here with comments + } + + for _, pattern := range expectedPatterns { + if strings.Contains(loc, pattern) { + return true + } + } + + return false +} diff --git a/pkg/test/ginkgo/cmd_runsuite.go b/pkg/test/ginkgo/cmd_runsuite.go index df1b73b0f641..5bf52aaccb85 100644 --- a/pkg/test/ginkgo/cmd_runsuite.go +++ b/pkg/test/ginkgo/cmd_runsuite.go @@ -220,6 +220,10 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc logrus.Infof("Discovered %d total tests", len(specs)) + // Check for global BeforeEach/AfterEach nodes in each binary + // Returns JUnit test cases as flakes to track the issue without blocking + globalNodeTestCases := extensions.CheckForGlobalNodes(specs) + // skip tests due to newer k8s restConfig, err := clusterinfo.GetMonitorRESTConfig() if err != nil { @@ -618,6 +622,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc var syntheticTestResults []*junitapi.JUnitTestCase var syntheticFailure bool syntheticTestResults = append(syntheticTestResults, stableClusterTestResults...) + syntheticTestResults = append(syntheticTestResults, globalNodeTestCases...) timeSuffix := fmt.Sprintf("_%s", start.UTC().Format("20060102-150405")) diff --git a/test/extended/image_ecosystem/mysql_replica.go b/test/extended/image_ecosystem/mysql_replica.go index 42a635388de5..2919a574c399 100644 --- a/test/extended/image_ecosystem/mysql_replica.go +++ b/test/extended/image_ecosystem/mysql_replica.go @@ -34,10 +34,13 @@ var ( false, }, } - helperTemplate = exutil.FixturePath("..", "..", "examples", "db-templates", "mysql-ephemeral-template.json") - helperName = "mysql-helper" + helperName = "mysql-helper" ) +func helperTemplate() string { + return exutil.FixturePath("..", "..", "examples", "db-templates", "mysql-ephemeral-template.json") +} + // CreateMySQLReplicationHelpers creates a set of MySQL helpers for master, // slave and an extra helper that is used for remote login test. func CreateMySQLReplicationHelpers(c kcoreclient.PodInterface, masterDeployment, slaveDeployment, helperDeployment string, slaveCount int) (exutil.Database, []exutil.Database, exutil.Database) { @@ -82,7 +85,7 @@ func replicationTestFactory(oc *exutil.CLI, tc testCase, cleanup func()) func() err = oc.Run("new-app").Args("--template", tc.TemplateName).Execute() o.Expect(err).NotTo(o.HaveOccurred()) - err = oc.Run("new-app").Args("-f", helperTemplate, "-p", fmt.Sprintf("MYSQL_VERSION=%s", tc.Version), "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", helperName)).Execute() + err = oc.Run("new-app").Args("-f", helperTemplate(), "-p", fmt.Sprintf("MYSQL_VERSION=%s", tc.Version), "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", helperName)).Execute() o.Expect(err).NotTo(o.HaveOccurred()) // oc.KubeFramework().WaitForAnEndpoint currently will wait forever; for now, prefacing with our WaitForADeploymentToComplete, diff --git a/test/extended/image_ecosystem/postgresql_replica.go b/test/extended/image_ecosystem/postgresql_replica.go index 6d0a5294d8cd..76696a2749d5 100644 --- a/test/extended/image_ecosystem/postgresql_replica.go +++ b/test/extended/image_ecosystem/postgresql_replica.go @@ -20,13 +20,16 @@ import ( var ( postgreSQLReplicationTemplate = "https://raw.githubusercontent.com/sclorg/postgresql-container/master/examples/replica/postgresql_replica.json" - postgreSQLEphemeralTemplate = exutil.FixturePath("..", "..", "examples", "db-templates", "postgresql-ephemeral-template.json") postgreSQLHelperName = "postgresql-helper" postgreSQLImages = []string{ "postgresql:9.6", } ) +func postgreSQLEphemeralTemplate() string { + return exutil.FixturePath("..", "..", "examples", "db-templates", "postgresql-ephemeral-template.json") +} + /* var _ = g.Describe("[sig-devex][Feature:ImageEcosystem][postgresql][Slow][Local] openshift postgresql replication", func() { defer g.GinkgoRecover() @@ -130,7 +133,7 @@ func PostgreSQLReplicationTestFactory(oc *exutil.CLI, image string, cleanup func err = oc.Run("new-app").Args("--template", "pg-replica-example", "-p", fmt.Sprintf("IMAGESTREAMTAG=%s", image)).Execute() o.Expect(err).NotTo(o.HaveOccurred()) - err = oc.Run("new-app").Args("-f", postgreSQLEphemeralTemplate, "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", postgreSQLHelperName)).Execute() + err = oc.Run("new-app").Args("-f", postgreSQLEphemeralTemplate(), "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", postgreSQLHelperName)).Execute() o.Expect(err).NotTo(o.HaveOccurred()) g.By("PV/PVC dump after setup") diff --git a/test/extended/kernel/common.go b/test/extended/kernel/common.go index 96ed3b6b737d..0cef82c4c572 100644 --- a/test/extended/kernel/common.go +++ b/test/extended/kernel/common.go @@ -23,12 +23,18 @@ var ( // - 5.14.0-430.el9.x86_64+rt // Continue using regex to tighten the match for both versions realTimeKernelRE = regexp.MustCompile(".*[.+]rt.*") - rtEnvFixture = exutil.FixturePath("testdata", "kernel", "rt-tests-environment.yaml") - rtPodFixture = exutil.FixturePath("testdata", "kernel", "rt-tests-pod.yaml") rtNamespace = "ci-realtime-testbed" rtPodName = "rt-tests" ) +func rtEnvFixture() string { + return exutil.FixturePath("testdata", "kernel", "rt-tests-environment.yaml") +} + +func rtPodFixture() string { + return exutil.FixturePath("testdata", "kernel", "rt-tests-pod.yaml") +} + func failIfNotRT(oc *exutil.CLI) { g.By("checking kernel configuration") @@ -57,14 +63,14 @@ func getRealTimeWorkerNodes(oc *exutil.CLI) (nodes map[string]int, err error) { // Setup the cluster infra needed for running RT tests func configureRealtimeTestEnvironment(oc *exutil.CLI) { g.By("Setting up the privileged environment needed for realtime tests") - err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtEnvFixture).Execute() + err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtEnvFixture()).Execute() o.Expect(err).NotTo(o.HaveOccurred(), "unable to create namespace and service accounts for rt-tests") } // Tear down the infra setup we used for testing func cleanupRealtimeTestEnvironment(oc *exutil.CLI) { g.By("Cleaning up the privileged environment needed for realtime tests") - err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtEnvFixture).Execute() + err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtEnvFixture()).Execute() o.Expect(err).NotTo(o.HaveOccurred(), "unable to clean up the namespace and service accounts for rt-tests") err = wait.PollImmediate(1*time.Second, 60*time.Second, func() (bool, error) { @@ -82,7 +88,7 @@ func cleanupRealtimeTestEnvironment(oc *exutil.CLI) { // Setup the pod that will be used to run the test func startRtTestPod(oc *exutil.CLI) { g.By("Setting up the pod needed for realtime tests") - err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtPodFixture).Execute() + err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtPodFixture()).Execute() o.Expect(err).NotTo(o.HaveOccurred(), "unable to create test pod for rt-tests") // Wait for the container to be ready to go @@ -93,7 +99,7 @@ func startRtTestPod(oc *exutil.CLI) { // Cleanup the pod used for the test func cleanupRtTestPod(oc *exutil.CLI) { g.By("Cleaning up the pod needed for realtime tests") - err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtPodFixture).Execute() + err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtPodFixture()).Execute() o.Expect(err).NotTo(o.HaveOccurred(), "unable to clean up test pod for rt-tests") // Wait for the container to be ready to go diff --git a/test/extended/networking/ipsec.go b/test/extended/networking/ipsec.go index e71d21638d33..5c0ee83681b2 100644 --- a/test/extended/networking/ipsec.go +++ b/test/extended/networking/ipsec.go @@ -505,7 +505,7 @@ var _ = g.Describe("[sig-network][Feature:IPsec]", g.Ordered, func() { // nodes don't go for a reboot while rolling out `99-worker-north-south-ipsec-config` // machine config which configures certificates for testing IPsec north south traffic. g.By("deploy machine configuration policy") - err = oc.AsAdmin().Run("apply").Args("-f", nsNodeRebootNoneFixture).Execute() + err = oc.AsAdmin().Run("apply").Args("-f", nsNodeRebootNoneFixture()).Execute() o.Expect(err).NotTo(o.HaveOccurred()) mg.WaitForBootImageControllerToComplete(oc) diff --git a/test/extended/networking/util.go b/test/extended/networking/util.go index ec1bc5ff59a8..ea9ed1f542ba 100644 --- a/test/extended/networking/util.go +++ b/test/extended/networking/util.go @@ -79,11 +79,20 @@ const ( var ( masterRoleMachineConfigLabel = map[string]string{"machineconfiguration.openshift.io/role": "master"} workerRoleMachineConfigLabel = map[string]string{"machineconfiguration.openshift.io/role": "worker"} - ipsecConfigurationBaseDir = exutil.FixturePath("testdata", "ipsec") - nsMachineConfigFixture = filepath.Join(ipsecConfigurationBaseDir, "nsconfig-machine-config.yaml") - nsNodeRebootNoneFixture = filepath.Join(ipsecConfigurationBaseDir, "nsconfig-reboot-none-policy.yaml") ) +func ipsecConfigurationBaseDir() string { + return exutil.FixturePath("testdata", "ipsec") +} + +func nsMachineConfigFixture() string { + return filepath.Join(ipsecConfigurationBaseDir(), "nsconfig-machine-config.yaml") +} + +func nsNodeRebootNoneFixture() string { + return filepath.Join(ipsecConfigurationBaseDir(), "nsconfig-reboot-none-policy.yaml") +} + // IsIPv6 returns true if a group of ips are ipv6. func isIpv6(ip []string) bool { ipv6 := false @@ -648,7 +657,7 @@ func createIPsecCertsMachineConfig(oc *exutil.CLI) (*mcfgv1.MachineConfig, error if err == nil { return nsCertMachineConfig, nil } - err = oc.AsAdmin().Run("create").Args("-f", nsMachineConfigFixture).Execute() + err = oc.AsAdmin().Run("create").Args("-f", nsMachineConfigFixture()).Execute() if err != nil { return nil, fmt.Errorf("error deploying IPsec certs Machine Config: %v", err) }