From 5cf2547d64d16e86016ce1d1ae032cdbe10b0b6d Mon Sep 17 00:00:00 2001 From: Weinan Liu Date: Thu, 5 Mar 2026 00:44:37 +0800 Subject: [PATCH] Skip dockercfg secret wait when image-registry pods unhealthy on Windows clusters When image-registry pods exist but none are Running and Ready, skip the dockercfg secret wait only on Windows (WINC) clusters. This targets debug-winc-vsphere Prow CI jobs where the SA token controller is known to be broken/disabled, causing dockercfg secrets to never be created. On non-Windows clusters, the normal wait proceeds unchanged so real infrastructure failures are not silently ignored. Fixes: https://issues.redhat.com/browse/WINC-1578 --- test/extended/util/client.go | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/extended/util/client.go b/test/extended/util/client.go index 409ba48546fd..4f183e922acb 100644 --- a/test/extended/util/client.go +++ b/test/extended/util/client.go @@ -418,6 +418,46 @@ func (c *CLI) setupProject() string { defaultRoleBindings = []string{} } + // Even if the ImageRegistry capability is enabled, the image-registry pods + // may not be healthy on Windows (WINC) debug clusters where the SA token + // controller is broken/disabled (e.g., debug-winc-vsphere Prow CI jobs), + // causing dockercfg secrets to never be created. Detect this by checking + // pod health and the presence of Windows nodes, and skip the wait to avoid + // a 3-minute timeout per SA. + if imageRegistryEnabled { + pods, podErr := c.AdminKubeClient().CoreV1().Pods("openshift-image-registry").List( + context.Background(), + metav1.ListOptions{LabelSelector: "docker-registry=default"}, + ) + if podErr == nil && len(pods.Items) > 0 { + hasHealthyPod := false + for _, pod := range pods.Items { + if pod.Status.Phase == corev1.PodRunning { + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + hasHealthyPod = true + break + } + } + } + if hasHealthyPod { + break + } + } + if !hasHealthyPod { + windowsNodes, winErr := c.AdminKubeClient().CoreV1().Nodes().List( + context.Background(), + metav1.ListOptions{LabelSelector: "kubernetes.io/os=windows"}, + ) + if winErr == nil && len(windowsNodes.Items) > 0 { + framework.Logf("Windows cluster with unhealthy image-registry pods, skipping dockercfg secret check") + DefaultServiceAccounts = []string{} + defaultRoleBindings = []string{} + } + } + } + } + for _, sa := range DefaultServiceAccounts { framework.Logf("Waiting for ServiceAccount %q to be provisioned...", sa) err = WaitForServiceAccountWithSecret(c.AdminConfigClient().ConfigV1().ClusterVersions(), c.KubeClient().CoreV1().ServiceAccounts(newNamespace), sa)