Skip to content

Commit f709c58

Browse files
committed
Allowlist controller: add node reconciler to sync allowlist on node creation
When a node joins the cluster, create a Job on that specific node to sync the CNI sysctl allowlist configuration. Implementation: - Watch node creation events - Skip any action if ConfigMap unchanged from the default (multus daemon installs default) - Create Job with nodeSelector targeting specific node - If job AlreadyExists, follow up the result - Delete successful Jobs, preserve failed Jobs for debugging Why not the daemonset approach: 1. Jobs run only on the new node (1 pod) while DaemonSet runs on all nodes (100+ pods), so 10 new nodes create 10 Jobs vs 1000 pods. 2. Each Job succeeds or fails independently with logs kept for 24 hours, while DaemonSet waits for all pods to be ready so one stuck pod blocks the entire update. 3. Multiple Jobs run in parallel without conflicts, while DaemonSet needs complex logic to handle multiple node events with delays and state checking. 4. ConfigMap changes use DaemonSet (update all nodes), node additions use Jobs (update one node), each approach fits its purpose. Known issues: 1. On restart both those jobs and the daemonset will at the same time. 2. On restart (or if new node is master node) job will fail after 10 minutes because we do not tolerate by design Assisted-By: Claude <noreply@anthropic.com> Signed-off-by: Konstantinos Karampogias <karampok@gmail.com>
1 parent 74e0cb5 commit f709c58

File tree

11 files changed

+1509
-3
lines changed

11 files changed

+1509
-3
lines changed

go.mod

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ require (
4242
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
4343
github.com/felixge/httpsnoop v1.0.4 // indirect
4444
github.com/fsnotify/fsnotify v1.9.0 // indirect
45-
github.com/go-logr/logr v1.4.3 // indirect
45+
github.com/go-logr/logr v1.4.3
4646
github.com/go-openapi/jsonpointer v0.22.1 // indirect
4747
github.com/go-openapi/jsonreference v0.21.2 // indirect
4848
github.com/go-openapi/swag v0.25.1 // indirect
4949
github.com/gogo/protobuf v1.3.2 // indirect
5050
github.com/golang/protobuf v1.5.4 // indirect
51-
github.com/google/go-cmp v0.7.0 // indirect
51+
github.com/google/go-cmp v0.7.0
5252
github.com/google/uuid v1.6.0 // indirect
5353
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.1-0.20210315223345-82c243799c99 // indirect
5454
github.com/huandu/xstrings v1.4.0 // indirect
@@ -98,7 +98,7 @@ require (
9898
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0 // indirect
9999
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
100100
sigs.k8s.io/kube-storage-version-migrator v0.0.6-0.20230721195810-5c8923c5ff96 // indirect
101-
sigs.k8s.io/yaml v1.6.0 // indirect
101+
sigs.k8s.io/yaml v1.6.0
102102
)
103103

104104
require (

pkg/controller/add_networkconfig.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ func init() {
2727
ingressconfig.Add,
2828
infrastructureconfig.Add,
2929
allowlist.Add,
30+
allowlist.AddNodeReconciler,
3031
dashboards.Add,
3132
)
3233
}
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
package allowlist
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"time"
8+
9+
cnoclient "github.com/openshift/cluster-network-operator/pkg/client"
10+
"github.com/openshift/cluster-network-operator/pkg/controller/statusmanager"
11+
"github.com/openshift/cluster-network-operator/pkg/names"
12+
"github.com/openshift/library-go/pkg/operator/configobserver/featuregates"
13+
batchv1 "k8s.io/api/batch/v1"
14+
corev1 "k8s.io/api/core/v1"
15+
"k8s.io/apimachinery/pkg/api/equality"
16+
apierrors "k8s.io/apimachinery/pkg/api/errors"
17+
"k8s.io/apimachinery/pkg/api/resource"
18+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+
"k8s.io/apimachinery/pkg/types"
20+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
21+
"k8s.io/klog/v2"
22+
"k8s.io/utils/ptr"
23+
24+
crclient "sigs.k8s.io/controller-runtime/pkg/client"
25+
"sigs.k8s.io/controller-runtime/pkg/controller"
26+
"sigs.k8s.io/controller-runtime/pkg/event"
27+
"sigs.k8s.io/controller-runtime/pkg/handler"
28+
"sigs.k8s.io/controller-runtime/pkg/manager"
29+
"sigs.k8s.io/controller-runtime/pkg/predicate"
30+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
31+
"sigs.k8s.io/controller-runtime/pkg/source"
32+
)
33+
34+
const (
35+
// allowlistJobTTL is the time to keep finished Jobs before automatic cleanup (24 hours)
36+
allowlistJobTTL = 86400
37+
// allowlistJobActiveDeadline is the maximum time a Job can run before termination (10 minutes)
38+
allowlistJobActiveDeadline = 600
39+
// reconcilerID is the identifier prefix for log messages
40+
reconcilerID = "Allowlist node reconciler:"
41+
)
42+
43+
var _ reconcile.Reconciler = &ReconcileNode{}
44+
45+
type ReconcileNode struct {
46+
client cnoclient.Client
47+
status *statusmanager.StatusManager
48+
}
49+
50+
// AddNodeReconciler creates a new node reconciler and adds it to the manager.
51+
// The node reconciler watches for node creation events and syncs the allowlist
52+
// to all nodes when a new node joins the cluster.
53+
func AddNodeReconciler(mgr manager.Manager, status *statusmanager.StatusManager, client cnoclient.Client, _ featuregates.FeatureGate) error {
54+
r := &ReconcileNode{client: client, status: status}
55+
c, err := controller.New("allowlist-node-controller", mgr, controller.Options{Reconciler: r})
56+
if err != nil {
57+
return err
58+
}
59+
60+
// Watch when nodes are created.
61+
// When a new node joins the cluster, reconcile to deploy the allowlist file to the new node.
62+
return c.Watch(
63+
source.Kind[crclient.Object](
64+
mgr.GetCache(),
65+
&corev1.Node{},
66+
&handler.EnqueueRequestForObject{},
67+
nodePredicate(),
68+
),
69+
)
70+
}
71+
72+
func (r *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
73+
defer utilruntime.HandleCrash(r.status.SetDegradedOnPanicAndCrash)
74+
75+
defaultCM := &corev1.ConfigMap{}
76+
if err := r.client.Default().CRClient().Get(ctx,
77+
types.NamespacedName{Name: names.DefaultAllowlistConfigName, Namespace: names.MultusNamespace},
78+
defaultCM); err != nil {
79+
klog.Infof("%s no default ConfigMap %v found", reconcilerID, err)
80+
return reconcile.Result{}, err
81+
}
82+
83+
allowlistCM := &corev1.ConfigMap{}
84+
if err := r.client.Default().CRClient().Get(ctx,
85+
types.NamespacedName{Name: names.AllowlistConfigName, Namespace: names.MultusNamespace},
86+
allowlistCM); err != nil {
87+
return reconcile.Result{}, crclient.IgnoreNotFound(err)
88+
}
89+
90+
// Skip job creation if allowlist matches default configuration.
91+
// The multus daemon already installs the default configmap on new nodes,
92+
// so we only need to run a job when the allowlist has been customized.
93+
if equality.Semantic.DeepEqual(allowlistCM.Data, defaultCM.Data) {
94+
klog.Infof("%s ConfigMaps are identical, skipping job creation for node %s", reconcilerID, request.Name)
95+
return reconcile.Result{}, nil
96+
}
97+
98+
nodeName := request.Name
99+
100+
// Job name includes ConfigMap ResourceVersion to ensure old jobs with stale
101+
// configs aren't reused when the allowlist is updated.
102+
job := newAllowlistJobFor(nodeName, allowlistCM.ResourceVersion)
103+
createErr := r.client.Default().CRClient().Create(ctx, job)
104+
105+
// Handle creation errors (excluding AlreadyExists)
106+
if createErr != nil && !apierrors.IsAlreadyExists(createErr) {
107+
klog.Infof("%s failed to create job %s: %v", reconcilerID, job.Name, createErr)
108+
return reconcile.Result{}, createErr
109+
}
110+
111+
// Job created successfully - requeue to check status later
112+
if createErr == nil {
113+
klog.Infof("%s job %s created", reconcilerID, job.Name)
114+
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
115+
}
116+
117+
// Job already exists - fetch it to check status immediately
118+
if err := r.client.Default().CRClient().Get(ctx,
119+
types.NamespacedName{Name: job.Name, Namespace: names.MultusNamespace}, job); err != nil {
120+
klog.Infof("%s failed to get existing job %s: %v", reconcilerID, job.Name, err)
121+
return reconcile.Result{}, err
122+
}
123+
124+
// Check job status
125+
for _, cond := range job.Status.Conditions {
126+
if cond.Type == batchv1.JobComplete && cond.Status == corev1.ConditionTrue {
127+
klog.Infof("%s job %s completed successfully, cleaning up", reconcilerID, job.Name)
128+
err := r.client.Default().CRClient().Delete(ctx, job,
129+
crclient.PropagationPolicy(metav1.DeletePropagationBackground))
130+
return reconcile.Result{}, crclient.IgnoreNotFound(err)
131+
}
132+
if (cond.Type == batchv1.JobFailureTarget || cond.Type == batchv1.JobFailed) &&
133+
cond.Status == corev1.ConditionTrue {
134+
klog.Infof("%s job %s failed: %s (preserved for debugging, TTL cleanup in 24h)",
135+
reconcilerID, job.Name, cond.Reason)
136+
return reconcile.Result{}, nil
137+
}
138+
}
139+
140+
klog.Infof("%s job %s is in progress", reconcilerID, job.Name)
141+
142+
return reconcile.Result{RequeueAfter: 30 * time.Second}, nil
143+
}
144+
145+
// nodePredicate returns a predicate that filters Node events.
146+
// Only node creations trigger reconciliation to distribute config to new nodes.
147+
func nodePredicate() predicate.Predicate {
148+
return predicate.Funcs{
149+
CreateFunc: func(_ event.CreateEvent) bool {
150+
return true
151+
},
152+
UpdateFunc: func(_ event.UpdateEvent) bool {
153+
return false
154+
},
155+
DeleteFunc: func(_ event.DeleteEvent) bool {
156+
return false
157+
},
158+
}
159+
}
160+
161+
func newAllowlistJobFor(nodeName string, configMapVersion string) *batchv1.Job {
162+
jobName := fmt.Sprintf("cni-sysctl-allowlist-%.32s-%.8s", nodeName, configMapVersion)
163+
return &batchv1.Job{
164+
ObjectMeta: metav1.ObjectMeta{
165+
Name: jobName,
166+
Namespace: names.MultusNamespace,
167+
Labels: map[string]string{
168+
"app": "cni-sysctl-allowlist-job",
169+
"node": nodeName,
170+
},
171+
},
172+
Spec: batchv1.JobSpec{
173+
BackoffLimit: ptr.To(int32(3)),
174+
TTLSecondsAfterFinished: ptr.To(int32(allowlistJobTTL)),
175+
ActiveDeadlineSeconds: ptr.To(int64(allowlistJobActiveDeadline)),
176+
Template: corev1.PodTemplateSpec{
177+
ObjectMeta: metav1.ObjectMeta{
178+
Labels: map[string]string{
179+
"app": "cni-sysctl-allowlist-job",
180+
"node": nodeName,
181+
},
182+
Annotations: map[string]string{
183+
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
184+
},
185+
},
186+
Spec: corev1.PodSpec{
187+
RestartPolicy: corev1.RestartPolicyNever,
188+
PriorityClassName: "openshift-user-critical",
189+
NodeSelector: map[string]string{
190+
"kubernetes.io/hostname": nodeName,
191+
},
192+
Containers: []corev1.Container{
193+
{
194+
Name: "kube-multus-additional-cni-plugins",
195+
Image: os.Getenv("MULTUS_IMAGE"),
196+
Command: []string{"/bin/bash", "-c", "cp /entrypoint/allowlist.conf /host/etc/cni/tuning/"},
197+
Resources: corev1.ResourceRequirements{
198+
Requests: corev1.ResourceList{
199+
corev1.ResourceCPU: resource.MustParse("10m"),
200+
corev1.ResourceMemory: resource.MustParse("10Mi"),
201+
},
202+
},
203+
SecurityContext: &corev1.SecurityContext{
204+
Privileged: ptr.To(true),
205+
},
206+
TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
207+
VolumeMounts: []corev1.VolumeMount{
208+
{
209+
Name: "cni-sysctl-allowlist",
210+
MountPath: "/entrypoint",
211+
},
212+
{
213+
Name: "tuning-conf-dir",
214+
MountPath: "/host/etc/cni/tuning/",
215+
ReadOnly: false,
216+
},
217+
},
218+
},
219+
},
220+
Volumes: []corev1.Volume{
221+
{
222+
Name: "cni-sysctl-allowlist",
223+
VolumeSource: corev1.VolumeSource{
224+
ConfigMap: &corev1.ConfigMapVolumeSource{
225+
LocalObjectReference: corev1.LocalObjectReference{
226+
Name: names.AllowlistConfigName,
227+
},
228+
DefaultMode: ptr.To(int32(0644)),
229+
},
230+
},
231+
},
232+
{
233+
Name: "tuning-conf-dir",
234+
VolumeSource: corev1.VolumeSource{
235+
HostPath: &corev1.HostPathVolumeSource{
236+
Path: "/etc/cni/tuning/",
237+
Type: ptr.To(corev1.HostPathDirectoryOrCreate),
238+
},
239+
},
240+
},
241+
},
242+
},
243+
},
244+
},
245+
}
246+
}

0 commit comments

Comments
 (0)