Skip to content

Commit 006ca7d

Browse files
committed
Fix FRR pods unable to reach Kubernetes API during bootstrap
FRR pods use hostNetwork: true but were trying to reach the Kubernetes API at the service IP (172.30.0.1), which kubelet auto-injects as KUBERNETES_SERVICE_HOST. During bootstrap, this service IP is not routable because the CNI (OVN-K) is not running yet, creating a deadlock: CNO waits for FRR webhook -> FRR pods can't reach API at 172.30.0.1 -> Service IP needs CNI routing -> CNI waits for FRR -> DEADLOCK Add KUBERNETES_SERVICE_HOST and KUBERNETES_SERVICE_PORT environment variables to FRR pods, overriding the kubelet-injected values with the actual API server address (the API VIP, e.g., 192.168.111.5). Since FRR pods use hostNetwork, they can reach the API VIP directly via L2 without needing CNI routing, breaking the deadlock. This follows the pattern used by other CNO hostNetwork components (ovnkube-node, multus, sdn). Signed-off-by: Riccardo Ravaioli <rravaiol@redhat.com>
1 parent cc63cdb commit 006ca7d

File tree

4 files changed

+27
-5
lines changed

4 files changed

+27
-5
lines changed

bindata/network/frr-k8s/frr-k8s.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ spec:
100100
- --metrics-bind-address=127.0.0.1:7572
101101
- $(LOG_LEVEL)
102102
env:
103+
- name: KUBERNETES_SERVICE_HOST
104+
value: "{{.KUBERNETES_SERVICE_HOST}}"
105+
- name: KUBERNETES_SERVICE_PORT
106+
value: "{{.KUBERNETES_SERVICE_PORT}}"
103107
- name: FRR_CONFIG_FILE
104108
value: /etc/frr_reloader/frr.conf
105109
- name: FRR_RELOADER_PID_FILE
@@ -113,7 +117,7 @@ spec:
113117
configMapKeyRef:
114118
name: env-overrides
115119
key: frrk8s-loglevel
116-
optional: true
120+
optional: true
117121
- name: NAMESPACE
118122
valueFrom:
119123
fieldRef:
@@ -259,6 +263,10 @@ spec:
259263
command:
260264
- /etc/frr_status/frr-status
261265
env:
266+
- name: KUBERNETES_SERVICE_HOST
267+
value: "{{.KUBERNETES_SERVICE_HOST}}"
268+
- name: KUBERNETES_SERVICE_PORT
269+
value: "{{.KUBERNETES_SERVICE_PORT}}"
262270
- name: NODE_NAME
263271
valueFrom:
264272
fieldRef:

bindata/network/frr-k8s/node-status-cleaner.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ spec:
3030
- --frrk8s-selector=component=frr-k8s
3131
- $(LOG_LEVEL)
3232
env:
33+
- name: KUBERNETES_SERVICE_HOST
34+
value: "{{.KUBERNETES_SERVICE_HOST}}"
35+
- name: KUBERNETES_SERVICE_PORT
36+
value: "{{.KUBERNETES_SERVICE_PORT}}"
3337
- name: NAMESPACE
3438
valueFrom:
3539
fieldRef:

pkg/network/render.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ func Render(operConf *operv1.NetworkSpec, clusterConf *configv1.NetworkSpec, man
235235
}
236236
objs = append(objs, o...)
237237

238-
o, err = renderAdditionalRoutingCapabilities(operConf, manifestDir, client)
238+
o, err = renderAdditionalRoutingCapabilities(operConf, manifestDir, client, bootstrapResult)
239239
if err != nil {
240240
return nil, progressing, err
241241
}
@@ -1074,7 +1074,7 @@ func isSupportedDualStackPlatform(platformType configv1.PlatformType) bool {
10741074
return dualStackPlatforms.Has(string(platformType))
10751075
}
10761076

1077-
func renderAdditionalRoutingCapabilities(conf *operv1.NetworkSpec, manifestDir string, client cnoclient.Client) ([]*uns.Unstructured, error) {
1077+
func renderAdditionalRoutingCapabilities(conf *operv1.NetworkSpec, manifestDir string, client cnoclient.Client, bootstrapResult *bootstrap.BootstrapResult) ([]*uns.Unstructured, error) {
10781078
if conf == nil || conf.AdditionalRoutingCapabilities == nil {
10791079
return nil, nil
10801080
}
@@ -1087,6 +1087,16 @@ func renderAdditionalRoutingCapabilities(conf *operv1.NetworkSpec, manifestDir s
10871087
data.Data["KubeRBACProxyImage"] = os.Getenv("KUBE_RBAC_PROXY_IMAGE")
10881088
data.Data["ReleaseVersion"] = os.Getenv("RELEASE_VERSION")
10891089

1090+
// Add Kubernetes API server host/port for hostNetwork pods.
1091+
// During bootstrap, the service IP (172.30.0.1) is not routable because
1092+
// the CNI is not yet running. These env vars allow FRR pods to connect
1093+
// to the API server directly using the actual API server address.
1094+
if bootstrapResult != nil {
1095+
apiServer := bootstrapResult.Infra.APIServers[bootstrap.APIServerDefault]
1096+
data.Data["KUBERNETES_SERVICE_HOST"] = apiServer.Host
1097+
data.Data["KUBERNETES_SERVICE_PORT"] = apiServer.Port
1098+
}
1099+
10901100
// Fetch the webhook CA bundle from the ConfigMap created by OperatorPKI
10911101
caBundle := getFRRK8sWebhookCABundle(client)
10921102
data.Data["FRRK8sWebhookCABundle"] = caBundle

pkg/network/render_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,13 +638,13 @@ func Test_renderAdditionalRoutingCapabilities(t *testing.T) {
638638
},
639639
},
640640
},
641-
want: 22, // 19 original + 2 OperatorPKI (webhook + metrics) + 1 document separator
641+
want: 21, // 19 original + 2 OperatorPKI (webhook + metrics)
642642
expectedErr: nil,
643643
},
644644
}
645645
for _, tt := range tests {
646646
t.Run(tt.name, func(t *testing.T) {
647-
got, err := renderAdditionalRoutingCapabilities(tt.args.operConf, manifestDir, nil)
647+
got, err := renderAdditionalRoutingCapabilities(tt.args.operConf, manifestDir, nil, fakeBootstrapResult())
648648
if !reflect.DeepEqual(tt.expectedErr, err) {
649649
t.Errorf("renderAdditionalRoutingCapabilities() err = %v, want %v", err, tt.expectedErr)
650650
}

0 commit comments

Comments
 (0)