|
| 1 | +package allowlist |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "os" |
| 7 | + "time" |
| 8 | + |
| 9 | + cnoclient "github.com/openshift/cluster-network-operator/pkg/client" |
| 10 | + "github.com/openshift/cluster-network-operator/pkg/controller/statusmanager" |
| 11 | + "github.com/openshift/cluster-network-operator/pkg/names" |
| 12 | + "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" |
| 13 | + batchv1 "k8s.io/api/batch/v1" |
| 14 | + corev1 "k8s.io/api/core/v1" |
| 15 | + "k8s.io/apimachinery/pkg/api/equality" |
| 16 | + apierrors "k8s.io/apimachinery/pkg/api/errors" |
| 17 | + "k8s.io/apimachinery/pkg/api/resource" |
| 18 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 19 | + "k8s.io/apimachinery/pkg/types" |
| 20 | + utilruntime "k8s.io/apimachinery/pkg/util/runtime" |
| 21 | + "k8s.io/klog/v2" |
| 22 | + "k8s.io/utils/ptr" |
| 23 | + |
| 24 | + crclient "sigs.k8s.io/controller-runtime/pkg/client" |
| 25 | + "sigs.k8s.io/controller-runtime/pkg/controller" |
| 26 | + "sigs.k8s.io/controller-runtime/pkg/event" |
| 27 | + "sigs.k8s.io/controller-runtime/pkg/handler" |
| 28 | + "sigs.k8s.io/controller-runtime/pkg/manager" |
| 29 | + "sigs.k8s.io/controller-runtime/pkg/predicate" |
| 30 | + "sigs.k8s.io/controller-runtime/pkg/reconcile" |
| 31 | + "sigs.k8s.io/controller-runtime/pkg/source" |
| 32 | +) |
| 33 | + |
| 34 | +const ( |
| 35 | + // allowlistJobTTL is the time to keep finished Jobs before automatic cleanup (24 hours) |
| 36 | + allowlistJobTTL = 86400 |
| 37 | + // allowlistJobActiveDeadline is the maximum time a Job can run before termination (10 minutes) |
| 38 | + allowlistJobActiveDeadline = 600 |
| 39 | + // reconcilerID is the identifier prefix for log messages |
| 40 | + reconcilerID = "Allowlist node reconciler:" |
| 41 | +) |
| 42 | + |
| 43 | +var _ reconcile.Reconciler = &ReconcileNode{} |
| 44 | + |
| 45 | +type ReconcileNode struct { |
| 46 | + client cnoclient.Client |
| 47 | + status *statusmanager.StatusManager |
| 48 | +} |
| 49 | + |
| 50 | +// AddNodeReconciler creates a new node reconciler and adds it to the manager. |
| 51 | +// The node reconciler watches for node creation events and syncs the allowlist |
| 52 | +// to all nodes when a new node joins the cluster. |
| 53 | +func AddNodeReconciler(mgr manager.Manager, status *statusmanager.StatusManager, client cnoclient.Client, _ featuregates.FeatureGate) error { |
| 54 | + r := &ReconcileNode{client: client, status: status} |
| 55 | + c, err := controller.New("allowlist-node-controller", mgr, controller.Options{Reconciler: r}) |
| 56 | + if err != nil { |
| 57 | + return err |
| 58 | + } |
| 59 | + |
| 60 | + // Watch when nodes are created. |
| 61 | + // When a new node joins the cluster, reconcile to deploy the allowlist file to the new node. |
| 62 | + return c.Watch( |
| 63 | + source.Kind[crclient.Object]( |
| 64 | + mgr.GetCache(), |
| 65 | + &corev1.Node{}, |
| 66 | + &handler.EnqueueRequestForObject{}, |
| 67 | + nodePredicate(), |
| 68 | + ), |
| 69 | + ) |
| 70 | +} |
| 71 | + |
| 72 | +func (r *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { |
| 73 | + defer utilruntime.HandleCrash(r.status.SetDegradedOnPanicAndCrash) |
| 74 | + |
| 75 | + defaultCM := &corev1.ConfigMap{} |
| 76 | + if err := r.client.Default().CRClient().Get(ctx, |
| 77 | + types.NamespacedName{Name: names.DefaultAllowlistConfigName, Namespace: names.MultusNamespace}, |
| 78 | + defaultCM); err != nil { |
| 79 | + klog.Infof("%s no default ConfigMap %v found", reconcilerID, err) |
| 80 | + return reconcile.Result{}, err |
| 81 | + } |
| 82 | + |
| 83 | + allowlistCM := &corev1.ConfigMap{} |
| 84 | + if err := r.client.Default().CRClient().Get(ctx, |
| 85 | + types.NamespacedName{Name: names.AllowlistConfigName, Namespace: names.MultusNamespace}, |
| 86 | + allowlistCM); err != nil { |
| 87 | + return reconcile.Result{}, crclient.IgnoreNotFound(err) |
| 88 | + } |
| 89 | + |
| 90 | + // Skip job creation if allowlist matches default configuration. |
| 91 | + // The multus daemon already installs the default configmap on new nodes, |
| 92 | + // so we only need to run a job when the allowlist has been customized. |
| 93 | + if equality.Semantic.DeepEqual(allowlistCM.Data, defaultCM.Data) { |
| 94 | + klog.Infof("%s ConfigMaps are identical, skipping job creation for node %s", reconcilerID, request.Name) |
| 95 | + return reconcile.Result{}, nil |
| 96 | + } |
| 97 | + |
| 98 | + nodeName := request.Name |
| 99 | + |
| 100 | + // Job name includes ConfigMap ResourceVersion to ensure old jobs with stale |
| 101 | + // configs aren't reused when the allowlist is updated. |
| 102 | + job := newAllowlistJobFor(nodeName, allowlistCM.ResourceVersion) |
| 103 | + createErr := r.client.Default().CRClient().Create(ctx, job) |
| 104 | + |
| 105 | + // Handle creation errors (excluding AlreadyExists) |
| 106 | + if createErr != nil && !apierrors.IsAlreadyExists(createErr) { |
| 107 | + klog.Infof("%s failed to create job %s: %v", reconcilerID, job.Name, createErr) |
| 108 | + return reconcile.Result{}, createErr |
| 109 | + } |
| 110 | + |
| 111 | + // Job created successfully - requeue to check status later |
| 112 | + if createErr == nil { |
| 113 | + klog.Infof("%s job %s created", reconcilerID, job.Name) |
| 114 | + return reconcile.Result{RequeueAfter: 30 * time.Second}, nil |
| 115 | + } |
| 116 | + |
| 117 | + // Job already exists - fetch it to check status immediately |
| 118 | + if err := r.client.Default().CRClient().Get(ctx, |
| 119 | + types.NamespacedName{Name: job.Name, Namespace: names.MultusNamespace}, job); err != nil { |
| 120 | + klog.Infof("%s failed to get existing job %s: %v", reconcilerID, job.Name, err) |
| 121 | + return reconcile.Result{}, err |
| 122 | + } |
| 123 | + |
| 124 | + // Check job status |
| 125 | + for _, cond := range job.Status.Conditions { |
| 126 | + if cond.Type == batchv1.JobComplete && cond.Status == corev1.ConditionTrue { |
| 127 | + klog.Infof("%s job %s completed successfully, cleaning up", reconcilerID, job.Name) |
| 128 | + err := r.client.Default().CRClient().Delete(ctx, job, |
| 129 | + crclient.PropagationPolicy(metav1.DeletePropagationBackground)) |
| 130 | + return reconcile.Result{}, crclient.IgnoreNotFound(err) |
| 131 | + } |
| 132 | + if (cond.Type == batchv1.JobFailureTarget || cond.Type == batchv1.JobFailed) && |
| 133 | + cond.Status == corev1.ConditionTrue { |
| 134 | + klog.Infof("%s job %s failed: %s (preserved for debugging, TTL cleanup in 24h)", |
| 135 | + reconcilerID, job.Name, cond.Reason) |
| 136 | + return reconcile.Result{}, nil |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + klog.Infof("%s job %s is in progress", reconcilerID, job.Name) |
| 141 | + |
| 142 | + return reconcile.Result{RequeueAfter: 30 * time.Second}, nil |
| 143 | +} |
| 144 | + |
| 145 | +// nodePredicate returns a predicate that filters Node events. |
| 146 | +// Only node creations trigger reconciliation to distribute config to new nodes. |
| 147 | +func nodePredicate() predicate.Predicate { |
| 148 | + return predicate.Funcs{ |
| 149 | + CreateFunc: func(_ event.CreateEvent) bool { |
| 150 | + return true |
| 151 | + }, |
| 152 | + UpdateFunc: func(_ event.UpdateEvent) bool { |
| 153 | + return false |
| 154 | + }, |
| 155 | + DeleteFunc: func(_ event.DeleteEvent) bool { |
| 156 | + return false |
| 157 | + }, |
| 158 | + } |
| 159 | +} |
| 160 | + |
| 161 | +func newAllowlistJobFor(nodeName string, configMapVersion string) *batchv1.Job { |
| 162 | + jobName := fmt.Sprintf("cni-sysctl-allowlist-%.32s-%.8s", nodeName, configMapVersion) |
| 163 | + return &batchv1.Job{ |
| 164 | + ObjectMeta: metav1.ObjectMeta{ |
| 165 | + Name: jobName, |
| 166 | + Namespace: names.MultusNamespace, |
| 167 | + Labels: map[string]string{ |
| 168 | + "app": "cni-sysctl-allowlist-job", |
| 169 | + "node": nodeName, |
| 170 | + }, |
| 171 | + }, |
| 172 | + Spec: batchv1.JobSpec{ |
| 173 | + BackoffLimit: ptr.To(int32(3)), |
| 174 | + TTLSecondsAfterFinished: ptr.To(int32(allowlistJobTTL)), |
| 175 | + ActiveDeadlineSeconds: ptr.To(int64(allowlistJobActiveDeadline)), |
| 176 | + Template: corev1.PodTemplateSpec{ |
| 177 | + ObjectMeta: metav1.ObjectMeta{ |
| 178 | + Labels: map[string]string{ |
| 179 | + "app": "cni-sysctl-allowlist-job", |
| 180 | + "node": nodeName, |
| 181 | + }, |
| 182 | + Annotations: map[string]string{ |
| 183 | + "target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`, |
| 184 | + }, |
| 185 | + }, |
| 186 | + Spec: corev1.PodSpec{ |
| 187 | + RestartPolicy: corev1.RestartPolicyNever, |
| 188 | + PriorityClassName: "openshift-user-critical", |
| 189 | + NodeSelector: map[string]string{ |
| 190 | + "kubernetes.io/hostname": nodeName, |
| 191 | + }, |
| 192 | + Containers: []corev1.Container{ |
| 193 | + { |
| 194 | + Name: "kube-multus-additional-cni-plugins", |
| 195 | + Image: os.Getenv("MULTUS_IMAGE"), |
| 196 | + Command: []string{"/bin/bash", "-c", "cp /entrypoint/allowlist.conf /host/etc/cni/tuning/"}, |
| 197 | + Resources: corev1.ResourceRequirements{ |
| 198 | + Requests: corev1.ResourceList{ |
| 199 | + corev1.ResourceCPU: resource.MustParse("10m"), |
| 200 | + corev1.ResourceMemory: resource.MustParse("10Mi"), |
| 201 | + }, |
| 202 | + }, |
| 203 | + SecurityContext: &corev1.SecurityContext{ |
| 204 | + Privileged: ptr.To(true), |
| 205 | + }, |
| 206 | + TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError, |
| 207 | + VolumeMounts: []corev1.VolumeMount{ |
| 208 | + { |
| 209 | + Name: "cni-sysctl-allowlist", |
| 210 | + MountPath: "/entrypoint", |
| 211 | + }, |
| 212 | + { |
| 213 | + Name: "tuning-conf-dir", |
| 214 | + MountPath: "/host/etc/cni/tuning/", |
| 215 | + ReadOnly: false, |
| 216 | + }, |
| 217 | + }, |
| 218 | + }, |
| 219 | + }, |
| 220 | + Volumes: []corev1.Volume{ |
| 221 | + { |
| 222 | + Name: "cni-sysctl-allowlist", |
| 223 | + VolumeSource: corev1.VolumeSource{ |
| 224 | + ConfigMap: &corev1.ConfigMapVolumeSource{ |
| 225 | + LocalObjectReference: corev1.LocalObjectReference{ |
| 226 | + Name: names.AllowlistConfigName, |
| 227 | + }, |
| 228 | + DefaultMode: ptr.To(int32(0644)), |
| 229 | + }, |
| 230 | + }, |
| 231 | + }, |
| 232 | + { |
| 233 | + Name: "tuning-conf-dir", |
| 234 | + VolumeSource: corev1.VolumeSource{ |
| 235 | + HostPath: &corev1.HostPathVolumeSource{ |
| 236 | + Path: "/etc/cni/tuning/", |
| 237 | + Type: ptr.To(corev1.HostPathDirectoryOrCreate), |
| 238 | + }, |
| 239 | + }, |
| 240 | + }, |
| 241 | + }, |
| 242 | + }, |
| 243 | + }, |
| 244 | + }, |
| 245 | + } |
| 246 | +} |
0 commit comments