Skip to content

Commit ca5ffed

Browse files
stbenjamclaude
andcommitted
fix(test): prevent global BeforeEach/AfterEach registration and add detection
When exutil.FixturePath() or exutil.NewCLI*() is called at package level (in a var declaration outside any function), it registers BeforeEach/AfterEach hooks at the root of the Ginkgo tree. This causes these hooks to run for EVERY SINGLE TEST in the suite, even when not needed, wasting CI resources and adding unnecessary test execution time. This PR fixes the remaining cases of package-level FixturePath() calls by converting them to lazy functions: - kernel/common.go: rtEnvFixture, rtPodFixture - postgresql_replica.go: postgreSQLEphemeralTemplate - mysql_replica.go: helperTemplate - networking/util.go: ipsecConfigurationBaseDir, nsMachineConfigFixture, nsNodeRebootNoneFixture Additionally, this PR adds runtime detection that creates flaky JUnit test cases when global nodes are detected. This allows CI to track future violations without blocking test runs. The detection: - Groups tests by image and binary - Identifies code locations appearing in 100% of tests for each binary - Reports violations as [sig-ci] flaky tests with detailed fix instructions - Skips binaries with <25 tests to avoid false positives - Supports an allowlist for temporary exceptions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0ee7ca3 commit ca5ffed

File tree

7 files changed

+268
-16
lines changed

7 files changed

+268
-16
lines changed
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
package extensions
2+
3+
import (
4+
"fmt"
5+
"sort"
6+
"strings"
7+
8+
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
9+
)
10+
11+
// binaryKey uniquely identifies a test binary by its image and path.
12+
type binaryKey struct {
13+
imageTag string
14+
binaryPath string
15+
}
16+
17+
// CheckForGlobalNodes checks if any code locations appear in ALL tests for each binary,
18+
// which indicates that BeforeEach/AfterEach nodes were registered at the global level
19+
// (root of the Ginkgo tree). This is a serious bug because these hooks run for EVERY test,
20+
// wasting resources and time. In some cases, these global nodes can
21+
// interfere with test operations.
22+
//
23+
// Tests are grouped by their source image and binary path, and each group is checked separately.
24+
//
25+
// Returns JUnit test cases as flakes (failing + passing with same name) for each binary
26+
// that has global nodes detected. This allows the issue to be tracked in CI without
27+
// blocking the test run for now.
28+
func CheckForGlobalNodes(specs ExtensionTestSpecs) []*junitapi.JUnitTestCase {
29+
// Group tests by image tag and binary path
30+
specsByBinary := make(map[binaryKey]ExtensionTestSpecs)
31+
for _, spec := range specs {
32+
if spec.Binary == nil {
33+
continue
34+
}
35+
key := binaryKey{
36+
imageTag: spec.Binary.imageTag,
37+
binaryPath: spec.Binary.binaryPath,
38+
}
39+
specsByBinary[key] = append(specsByBinary[key], spec)
40+
}
41+
42+
// Check each binary for global nodes
43+
type binaryResult struct {
44+
imageTag string
45+
binaryPath string
46+
globalLocations []string
47+
totalTests int
48+
}
49+
var results []binaryResult
50+
51+
for key, binarySpecs := range specsByBinary {
52+
// Skip binaries with fewer than 25 tests - can't detect global nodes meaningfully
53+
// with small test counts as it would generate false positives
54+
if len(binarySpecs) < 25 {
55+
continue
56+
}
57+
58+
totalTests := len(binarySpecs)
59+
60+
// Count how many unique tests contain each code location
61+
locationToTests := make(map[string]map[string]struct{})
62+
for _, spec := range binarySpecs {
63+
for _, loc := range spec.CodeLocations {
64+
if locationToTests[loc] == nil {
65+
locationToTests[loc] = make(map[string]struct{})
66+
}
67+
locationToTests[loc][spec.Name] = struct{}{}
68+
}
69+
}
70+
71+
// Find code locations that appear in ALL tests for this binary
72+
var globalLocations []string
73+
for loc, tests := range locationToTests {
74+
if len(tests) != totalTests {
75+
continue
76+
}
77+
78+
// Skip locations that are expected to be in all tests
79+
if isExpectedGlobalLocation(loc) {
80+
continue
81+
}
82+
83+
globalLocations = append(globalLocations, loc)
84+
}
85+
86+
if len(globalLocations) > 0 {
87+
sort.Strings(globalLocations)
88+
results = append(results, binaryResult{
89+
imageTag: key.imageTag,
90+
binaryPath: key.binaryPath,
91+
globalLocations: globalLocations,
92+
totalTests: totalTests,
93+
})
94+
}
95+
}
96+
97+
if len(results) == 0 {
98+
return nil
99+
}
100+
101+
// Sort results by imageTag then binaryPath for consistent output
102+
sort.Slice(results, func(i, j int) bool {
103+
if results[i].imageTag != results[j].imageTag {
104+
return results[i].imageTag < results[j].imageTag
105+
}
106+
return results[i].binaryPath < results[j].binaryPath
107+
})
108+
109+
// Create JUnit test cases as flakes (one failing, one passing per binary)
110+
var testCases []*junitapi.JUnitTestCase
111+
112+
for _, result := range results {
113+
testName := fmt.Sprintf("[sig-ci] image %s binary %s should not have global BeforeEach/AfterEach nodes", result.imageTag, result.binaryPath)
114+
115+
// Build detailed failure message
116+
failureOutput := buildGlobalNodeFailureMessage(result.imageTag, result.binaryPath, result.globalLocations, result.totalTests)
117+
118+
// Create failing test case
119+
failingCase := &junitapi.JUnitTestCase{
120+
Name: testName,
121+
FailureOutput: &junitapi.FailureOutput{
122+
Message: fmt.Sprintf("Found %d global BeforeEach/AfterEach code locations in image %s binary %s", len(result.globalLocations), result.imageTag, result.binaryPath),
123+
Output: failureOutput,
124+
},
125+
}
126+
testCases = append(testCases, failingCase)
127+
128+
// Create passing test case (same name = flake)
129+
passingCase := &junitapi.JUnitTestCase{
130+
Name: testName,
131+
}
132+
testCases = append(testCases, passingCase)
133+
}
134+
135+
return testCases
136+
}
137+
138+
// buildGlobalNodeFailureMessage creates a detailed message explaining the global node issue.
139+
func buildGlobalNodeFailureMessage(imageTag, binaryPath string, globalLocations []string, totalTests int) string {
140+
var sb strings.Builder
141+
142+
sb.WriteString("\n")
143+
sb.WriteString("╔══════════════════════════════════════════════════════════════════════════════╗\n")
144+
sb.WriteString("║ GLOBAL BEFOREEACH/AFTEREACH DETECTED ║\n")
145+
sb.WriteString("╚══════════════════════════════════════════════════════════════════════════════╝\n")
146+
sb.WriteString("\n")
147+
sb.WriteString(fmt.Sprintf("IMAGE: %s\n", imageTag))
148+
sb.WriteString(fmt.Sprintf("BINARY: %s\n", binaryPath))
149+
sb.WriteString(fmt.Sprintf("TESTS: %d\n", totalTests))
150+
sb.WriteString("\n")
151+
sb.WriteString("PROBLEM: The following code locations appear in ALL tests for this binary,\n")
152+
sb.WriteString("indicating that BeforeEach or AfterEach hooks were registered at the global level.\n")
153+
sb.WriteString("\n")
154+
sb.WriteString("This means these hooks run for EVERY SINGLE TEST even when not needed,\n")
155+
sb.WriteString("wasting CI resources and adding unnecessary test execution time.\n")
156+
sb.WriteString("\n")
157+
sb.WriteString("GLOBAL CODE LOCATIONS:\n")
158+
for _, loc := range globalLocations {
159+
sb.WriteString(fmt.Sprintf(" • %s\n", loc))
160+
}
161+
sb.WriteString("\n")
162+
sb.WriteString("COMMON CAUSES:\n")
163+
sb.WriteString("\n")
164+
sb.WriteString("1. Package-level FixturePath() call:\n")
165+
sb.WriteString(" BAD: var myFixture = exutil.FixturePath(\"testdata\", \"file.yaml\")\n")
166+
sb.WriteString(" GOOD: func myFixture() string { return exutil.FixturePath(\"testdata\", \"file.yaml\") }\n")
167+
sb.WriteString("\n")
168+
sb.WriteString("2. Package-level NewCLI/NewCLIWithoutNamespace call:\n")
169+
sb.WriteString(" BAD: var oc = exutil.NewCLIWithoutNamespace(\"test\")\n")
170+
sb.WriteString(" GOOD: Inside g.Describe(): oc := exutil.NewCLIWithoutNamespace(\"test\")\n")
171+
sb.WriteString("\n")
172+
sb.WriteString("HOW TO FIX:\n")
173+
sb.WriteString("\n")
174+
sb.WriteString("1. Find the file and line number from the locations above\n")
175+
sb.WriteString("2. Look for package-level var declarations that call FixturePath() or NewCLI*()\n")
176+
sb.WriteString("3. Convert them to functions (for FixturePath) or move inside Describe blocks (for CLI)\n")
177+
sb.WriteString("\n")
178+
sb.WriteString("EXAMPLE FIX FOR FixturePath:\n")
179+
sb.WriteString("\n")
180+
sb.WriteString(" // BEFORE (causes global BeforeEach):\n")
181+
sb.WriteString(" var myFixture = exutil.FixturePath(\"testdata\", \"file.yaml\")\n")
182+
sb.WriteString(" func useFixture() { oc.Run(\"apply\").Args(\"-f\", myFixture) }\n")
183+
sb.WriteString("\n")
184+
sb.WriteString(" // AFTER (lazy evaluation, no global BeforeEach):\n")
185+
sb.WriteString(" func myFixture() string { return exutil.FixturePath(\"testdata\", \"file.yaml\") }\n")
186+
sb.WriteString(" func useFixture() { oc.Run(\"apply\").Args(\"-f\", myFixture()) }\n")
187+
sb.WriteString("\n")
188+
sb.WriteString("FOR AI ASSISTANTS:\n")
189+
sb.WriteString("If you're Claude or another AI helping fix this, search the codebase for the file\n")
190+
sb.WriteString("paths shown above. Look for package-level 'var' declarations that call\n")
191+
sb.WriteString("exutil.FixturePath() or exutil.NewCLI*(). Convert FixturePath vars to functions\n")
192+
sb.WriteString("that return the path. Move NewCLI* calls inside Ginkgo Describe/Context blocks.\n")
193+
sb.WriteString("\n")
194+
195+
return sb.String()
196+
}
197+
198+
// allowedGlobalLocations is an exception list of code locations that are known
199+
// to register global BeforeEach/AfterEach nodes. These should be fixed
200+
// unless it is intentional.
201+
var allowedGlobalLocations = map[string]string{
202+
// Add exceptions here with an explanation, e.g.:
203+
// "github.com/openshift/origin/test/extended/networking/util.go:82": "FIXME: package-level FixturePath call",
204+
}
205+
206+
// isExpectedGlobalLocation returns true for code locations that are expected
207+
// to appear in all tests and should not trigger the global node detection.
208+
func isExpectedGlobalLocation(loc string) bool {
209+
// Check exact match in allowlist
210+
if _, ok := allowedGlobalLocations[loc]; ok {
211+
return true
212+
}
213+
214+
// Check pattern matches for framework infrastructure that's legitimately global
215+
expectedPatterns := []string{
216+
// None currently - if we find legitimate cases, add them here with comments
217+
}
218+
219+
for _, pattern := range expectedPatterns {
220+
if strings.Contains(loc, pattern) {
221+
return true
222+
}
223+
}
224+
225+
return false
226+
}

pkg/test/ginkgo/cmd_runsuite.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
220220

221221
logrus.Infof("Discovered %d total tests", len(specs))
222222

223+
// Check for global BeforeEach/AfterEach nodes in each binary
224+
// Returns JUnit test cases as flakes to track the issue without blocking
225+
globalNodeTestCases := extensions.CheckForGlobalNodes(specs)
226+
223227
// skip tests due to newer k8s
224228
restConfig, err := clusterinfo.GetMonitorRESTConfig()
225229
if err != nil {
@@ -618,6 +622,7 @@ func (o *GinkgoRunSuiteOptions) Run(suite *TestSuite, clusterConfig *clusterdisc
618622
var syntheticTestResults []*junitapi.JUnitTestCase
619623
var syntheticFailure bool
620624
syntheticTestResults = append(syntheticTestResults, stableClusterTestResults...)
625+
syntheticTestResults = append(syntheticTestResults, globalNodeTestCases...)
621626

622627
timeSuffix := fmt.Sprintf("_%s", start.UTC().Format("20060102-150405"))
623628

test/extended/image_ecosystem/mysql_replica.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,13 @@ var (
3434
false,
3535
},
3636
}
37-
helperTemplate = exutil.FixturePath("..", "..", "examples", "db-templates", "mysql-ephemeral-template.json")
38-
helperName = "mysql-helper"
37+
helperName = "mysql-helper"
3938
)
4039

40+
func helperTemplate() string {
41+
return exutil.FixturePath("..", "..", "examples", "db-templates", "mysql-ephemeral-template.json")
42+
}
43+
4144
// CreateMySQLReplicationHelpers creates a set of MySQL helpers for master,
4245
// slave and an extra helper that is used for remote login test.
4346
func CreateMySQLReplicationHelpers(c kcoreclient.PodInterface, masterDeployment, slaveDeployment, helperDeployment string, slaveCount int) (exutil.Database, []exutil.Database, exutil.Database) {
@@ -82,7 +85,7 @@ func replicationTestFactory(oc *exutil.CLI, tc testCase, cleanup func()) func()
8285
err = oc.Run("new-app").Args("--template", tc.TemplateName).Execute()
8386
o.Expect(err).NotTo(o.HaveOccurred())
8487

85-
err = oc.Run("new-app").Args("-f", helperTemplate, "-p", fmt.Sprintf("MYSQL_VERSION=%s", tc.Version), "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", helperName)).Execute()
88+
err = oc.Run("new-app").Args("-f", helperTemplate(), "-p", fmt.Sprintf("MYSQL_VERSION=%s", tc.Version), "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", helperName)).Execute()
8689
o.Expect(err).NotTo(o.HaveOccurred())
8790

8891
// oc.KubeFramework().WaitForAnEndpoint currently will wait forever; for now, prefacing with our WaitForADeploymentToComplete,

test/extended/image_ecosystem/postgresql_replica.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,16 @@ import (
2020

2121
var (
2222
postgreSQLReplicationTemplate = "https://raw.githubusercontent.com/sclorg/postgresql-container/master/examples/replica/postgresql_replica.json"
23-
postgreSQLEphemeralTemplate = exutil.FixturePath("..", "..", "examples", "db-templates", "postgresql-ephemeral-template.json")
2423
postgreSQLHelperName = "postgresql-helper"
2524
postgreSQLImages = []string{
2625
"postgresql:9.6",
2726
}
2827
)
2928

29+
func postgreSQLEphemeralTemplate() string {
30+
return exutil.FixturePath("..", "..", "examples", "db-templates", "postgresql-ephemeral-template.json")
31+
}
32+
3033
/*
3134
var _ = g.Describe("[sig-devex][Feature:ImageEcosystem][postgresql][Slow][Local] openshift postgresql replication", func() {
3235
defer g.GinkgoRecover()
@@ -130,7 +133,7 @@ func PostgreSQLReplicationTestFactory(oc *exutil.CLI, image string, cleanup func
130133
err = oc.Run("new-app").Args("--template", "pg-replica-example", "-p", fmt.Sprintf("IMAGESTREAMTAG=%s", image)).Execute()
131134
o.Expect(err).NotTo(o.HaveOccurred())
132135

133-
err = oc.Run("new-app").Args("-f", postgreSQLEphemeralTemplate, "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", postgreSQLHelperName)).Execute()
136+
err = oc.Run("new-app").Args("-f", postgreSQLEphemeralTemplate(), "-p", fmt.Sprintf("DATABASE_SERVICE_NAME=%s", postgreSQLHelperName)).Execute()
134137
o.Expect(err).NotTo(o.HaveOccurred())
135138

136139
g.By("PV/PVC dump after setup")

test/extended/kernel/common.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@ var (
2323
// - 5.14.0-430.el9.x86_64+rt
2424
// Continue using regex to tighten the match for both versions
2525
realTimeKernelRE = regexp.MustCompile(".*[.+]rt.*")
26-
rtEnvFixture = exutil.FixturePath("testdata", "kernel", "rt-tests-environment.yaml")
27-
rtPodFixture = exutil.FixturePath("testdata", "kernel", "rt-tests-pod.yaml")
2826
rtNamespace = "ci-realtime-testbed"
2927
rtPodName = "rt-tests"
3028
)
3129

30+
func rtEnvFixture() string {
31+
return exutil.FixturePath("testdata", "kernel", "rt-tests-environment.yaml")
32+
}
33+
34+
func rtPodFixture() string {
35+
return exutil.FixturePath("testdata", "kernel", "rt-tests-pod.yaml")
36+
}
37+
3238
func failIfNotRT(oc *exutil.CLI) {
3339
g.By("checking kernel configuration")
3440

@@ -57,14 +63,14 @@ func getRealTimeWorkerNodes(oc *exutil.CLI) (nodes map[string]int, err error) {
5763
// Setup the cluster infra needed for running RT tests
5864
func configureRealtimeTestEnvironment(oc *exutil.CLI) {
5965
g.By("Setting up the privileged environment needed for realtime tests")
60-
err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtEnvFixture).Execute()
66+
err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtEnvFixture()).Execute()
6167
o.Expect(err).NotTo(o.HaveOccurred(), "unable to create namespace and service accounts for rt-tests")
6268
}
6369

6470
// Tear down the infra setup we used for testing
6571
func cleanupRealtimeTestEnvironment(oc *exutil.CLI) {
6672
g.By("Cleaning up the privileged environment needed for realtime tests")
67-
err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtEnvFixture).Execute()
73+
err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtEnvFixture()).Execute()
6874
o.Expect(err).NotTo(o.HaveOccurred(), "unable to clean up the namespace and service accounts for rt-tests")
6975

7076
err = wait.PollImmediate(1*time.Second, 60*time.Second, func() (bool, error) {
@@ -82,7 +88,7 @@ func cleanupRealtimeTestEnvironment(oc *exutil.CLI) {
8288
// Setup the pod that will be used to run the test
8389
func startRtTestPod(oc *exutil.CLI) {
8490
g.By("Setting up the pod needed for realtime tests")
85-
err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtPodFixture).Execute()
91+
err := oc.SetNamespace(rtNamespace).Run("apply").Args("-f", rtPodFixture()).Execute()
8692
o.Expect(err).NotTo(o.HaveOccurred(), "unable to create test pod for rt-tests")
8793

8894
// Wait for the container to be ready to go
@@ -93,7 +99,7 @@ func startRtTestPod(oc *exutil.CLI) {
9399
// Cleanup the pod used for the test
94100
func cleanupRtTestPod(oc *exutil.CLI) {
95101
g.By("Cleaning up the pod needed for realtime tests")
96-
err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtPodFixture).Execute()
102+
err := oc.SetNamespace(rtNamespace).Run("delete").Args("-f", rtPodFixture()).Execute()
97103
o.Expect(err).NotTo(o.HaveOccurred(), "unable to clean up test pod for rt-tests")
98104

99105
// Wait for the container to be ready to go

test/extended/networking/ipsec.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ var _ = g.Describe("[sig-network][Feature:IPsec]", g.Ordered, func() {
505505
// nodes don't go for a reboot while rolling out `99-worker-north-south-ipsec-config`
506506
// machine config which configures certificates for testing IPsec north south traffic.
507507
g.By("deploy machine configuration policy")
508-
err = oc.AsAdmin().Run("apply").Args("-f", nsNodeRebootNoneFixture).Execute()
508+
err = oc.AsAdmin().Run("apply").Args("-f", nsNodeRebootNoneFixture()).Execute()
509509
o.Expect(err).NotTo(o.HaveOccurred())
510510
mg.WaitForBootImageControllerToComplete(oc)
511511

test/extended/networking/util.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,20 @@ const (
7979
var (
8080
masterRoleMachineConfigLabel = map[string]string{"machineconfiguration.openshift.io/role": "master"}
8181
workerRoleMachineConfigLabel = map[string]string{"machineconfiguration.openshift.io/role": "worker"}
82-
ipsecConfigurationBaseDir = exutil.FixturePath("testdata", "ipsec")
83-
nsMachineConfigFixture = filepath.Join(ipsecConfigurationBaseDir, "nsconfig-machine-config.yaml")
84-
nsNodeRebootNoneFixture = filepath.Join(ipsecConfigurationBaseDir, "nsconfig-reboot-none-policy.yaml")
8582
)
8683

84+
func ipsecConfigurationBaseDir() string {
85+
return exutil.FixturePath("testdata", "ipsec")
86+
}
87+
88+
func nsMachineConfigFixture() string {
89+
return filepath.Join(ipsecConfigurationBaseDir(), "nsconfig-machine-config.yaml")
90+
}
91+
92+
func nsNodeRebootNoneFixture() string {
93+
return filepath.Join(ipsecConfigurationBaseDir(), "nsconfig-reboot-none-policy.yaml")
94+
}
95+
8796
// IsIPv6 returns true if a group of ips are ipv6.
8897
func isIpv6(ip []string) bool {
8998
ipv6 := false
@@ -648,7 +657,7 @@ func createIPsecCertsMachineConfig(oc *exutil.CLI) (*mcfgv1.MachineConfig, error
648657
if err == nil {
649658
return nsCertMachineConfig, nil
650659
}
651-
err = oc.AsAdmin().Run("create").Args("-f", nsMachineConfigFixture).Execute()
660+
err = oc.AsAdmin().Run("create").Args("-f", nsMachineConfigFixture()).Execute()
652661
if err != nil {
653662
return nil, fmt.Errorf("error deploying IPsec certs Machine Config: %v", err)
654663
}

0 commit comments

Comments
 (0)