Skip to content

Commit a342e55

Browse files
ricky-ravclaude
andcommitted
Fix FRR pods unable to reach Kubernetes API during bootstrap
FRR pods use hostNetwork: true but were trying to reach the Kubernetes API at the service IP (172.30.0.1). During bootstrap, this service IP is not routable because the CNI (OVN-K) is not running yet, creating a deadlock: CNO waits for FRR webhook ready -> FRR pods can't reach API at 172.30.0.1 -> Service IP needs CNI routing -> CNI waits for FRR webhook -> DEADLOCK Add KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT environment variables to FRR pods, following the pattern used by other CNO hostNetwork components (OVN-K, multus, SDN). These env vars override the default service IP with the actual API server address. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Riccardo Ravaioli <rravaiol@redhat.com>
1 parent 9821a1e commit a342e55

File tree

6 files changed

+27
-8
lines changed

6 files changed

+27
-8
lines changed

bindata/network/frr-k8s/003-pki.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
# Both webhook and metrics need OperatorPKI because the FRR DaemonSet
88
# requires the metrics TLS secret to start, and service-ca is not
99
# available during bootstrap (it depends on CNI being ready).
10-
---
1110
apiVersion: network.operator.openshift.io/v1
1211
kind: OperatorPKI
1312
metadata:

bindata/network/frr-k8s/frr-k8s.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ spec:
100100
- --metrics-bind-address=127.0.0.1:7572
101101
- $(LOG_LEVEL)
102102
env:
103+
- name: KUBERNETES_SERVICE_HOST
104+
value: "{{.KUBERNETES_SERVICE_HOST}}"
105+
- name: KUBERNETES_SERVICE_PORT
106+
value: "{{.KUBERNETES_SERVICE_PORT}}"
103107
- name: FRR_CONFIG_FILE
104108
value: /etc/frr_reloader/frr.conf
105109
- name: FRR_RELOADER_PID_FILE
@@ -113,7 +117,7 @@ spec:
113117
configMapKeyRef:
114118
name: env-overrides
115119
key: frrk8s-loglevel
116-
optional: true
120+
optional: true
117121
- name: NAMESPACE
118122
valueFrom:
119123
fieldRef:
@@ -259,6 +263,10 @@ spec:
259263
command:
260264
- /etc/frr_status/frr-status
261265
env:
266+
- name: KUBERNETES_SERVICE_HOST
267+
value: "{{.KUBERNETES_SERVICE_HOST}}"
268+
- name: KUBERNETES_SERVICE_PORT
269+
value: "{{.KUBERNETES_SERVICE_PORT}}"
262270
- name: NODE_NAME
263271
valueFrom:
264272
fieldRef:

bindata/network/frr-k8s/node-status-cleaner.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ spec:
3030
- --frrk8s-selector=component=frr-k8s
3131
- $(LOG_LEVEL)
3232
env:
33+
- name: KUBERNETES_SERVICE_HOST
34+
value: "{{.KUBERNETES_SERVICE_HOST}}"
35+
- name: KUBERNETES_SERVICE_PORT
36+
value: "{{.KUBERNETES_SERVICE_PORT}}"
3337
- name: NAMESPACE
3438
valueFrom:
3539
fieldRef:

bindata/network/frr-k8s/webhook.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
---
21
apiVersion: v1
32
kind: Service
43
metadata:
@@ -37,4 +36,3 @@ webhooks:
3736
resources:
3837
- frrconfigurations
3938
sideEffects: None
40-
---

pkg/network/render.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ func Render(operConf *operv1.NetworkSpec, clusterConf *configv1.NetworkSpec, man
235235
}
236236
objs = append(objs, o...)
237237

238-
o, err = renderAdditionalRoutingCapabilities(operConf, manifestDir, client)
238+
o, err = renderAdditionalRoutingCapabilities(operConf, manifestDir, client, bootstrapResult)
239239
if err != nil {
240240
return nil, progressing, err
241241
}
@@ -1074,7 +1074,7 @@ func isSupportedDualStackPlatform(platformType configv1.PlatformType) bool {
10741074
return dualStackPlatforms.Has(string(platformType))
10751075
}
10761076

1077-
func renderAdditionalRoutingCapabilities(conf *operv1.NetworkSpec, manifestDir string, client cnoclient.Client) ([]*uns.Unstructured, error) {
1077+
func renderAdditionalRoutingCapabilities(conf *operv1.NetworkSpec, manifestDir string, client cnoclient.Client, bootstrapResult *bootstrap.BootstrapResult) ([]*uns.Unstructured, error) {
10781078
if conf == nil || conf.AdditionalRoutingCapabilities == nil {
10791079
return nil, nil
10801080
}
@@ -1087,6 +1087,16 @@ func renderAdditionalRoutingCapabilities(conf *operv1.NetworkSpec, manifestDir s
10871087
data.Data["KubeRBACProxyImage"] = os.Getenv("KUBE_RBAC_PROXY_IMAGE")
10881088
data.Data["ReleaseVersion"] = os.Getenv("RELEASE_VERSION")
10891089

1090+
// Add Kubernetes API server host/port for hostNetwork pods.
1091+
// During bootstrap, the service IP (172.30.0.1) is not routable because
1092+
// the CNI is not yet running. These env vars allow FRR pods to connect
1093+
// to the API server directly using the actual API server address.
1094+
if bootstrapResult != nil {
1095+
apiServer := bootstrapResult.Infra.APIServers[bootstrap.APIServerDefault]
1096+
data.Data["KUBERNETES_SERVICE_HOST"] = apiServer.Host
1097+
data.Data["KUBERNETES_SERVICE_PORT"] = apiServer.Port
1098+
}
1099+
10901100
// Fetch the webhook CA bundle from the ConfigMap created by OperatorPKI
10911101
caBundle := getFRRK8sWebhookCABundle(client)
10921102
data.Data["FRRK8sWebhookCABundle"] = caBundle

pkg/network/render_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,13 +638,13 @@ func Test_renderAdditionalRoutingCapabilities(t *testing.T) {
638638
},
639639
},
640640
},
641-
want: 22, // 19 original + 2 OperatorPKI (webhook + metrics) + 1 document separator
641+
want: 21, // 19 original + 2 OperatorPKI (webhook + metrics)
642642
expectedErr: nil,
643643
},
644644
}
645645
for _, tt := range tests {
646646
t.Run(tt.name, func(t *testing.T) {
647-
got, err := renderAdditionalRoutingCapabilities(tt.args.operConf, manifestDir, nil)
647+
got, err := renderAdditionalRoutingCapabilities(tt.args.operConf, manifestDir, nil, fakeBootstrapResult())
648648
if !reflect.DeepEqual(tt.expectedErr, err) {
649649
t.Errorf("renderAdditionalRoutingCapabilities() err = %v, want %v", err, tt.expectedErr)
650650
}

0 commit comments

Comments
 (0)