From e3a83fd768c23cc30bffa25eb0f7b15d4ed68a94 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Tue, 21 Oct 2025 19:15:22 +0000 Subject: [PATCH] Add e2e tests for CPU startup boost --- .../e2e/v1/admission_controller.go | 118 ++++++++++++++++++ vertical-pod-autoscaler/e2e/v1/common.go | 60 ++++++++- vertical-pod-autoscaler/e2e/v1/full_vpa.go | 117 +++++++++++++++++ vertical-pod-autoscaler/e2e/v1/updater.go | 85 +++++++++++++ 4 files changed, 375 insertions(+), 5 deletions(-) diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go index 502a07b81868..c933450c9379 100644 --- a/vertical-pod-autoscaler/e2e/v1/admission_controller.go +++ b/vertical-pod-autoscaler/e2e/v1/admission_controller.go @@ -28,6 +28,7 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment" @@ -961,6 +962,123 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { }) }) +var _ = AdmissionControllerE2eDescribe("Admission-controller", ginkgo.Label("FG:CPUStartupBoost"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkFeatureGateTestsEnabled(f, features.CPUStartupBoost, true, false) + waitForVpaWebhookRegistration(f) + }) + + ginkgo.It("boosts CPU by factor on pod creation", func() { + initialCPU := ParseQuantityOrDie("100m") + expectedCPU := ParseQuantityOrDie("200m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy (factor)") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "15s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("100m", "100Mi"). + GetContainerResources(), + ). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + gomega.Expect(pod.Spec.Containers[0].Resources.Limits.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + }) + + ginkgo.It("boosts CPU by quantity on pod creation", func() { + initialCPU := ParseQuantityOrDie("100m") + boostCPUQuantity := ParseQuantityOrDie("500m") + expectedCPU := ParseQuantityOrDie("600m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy (quantity)") + containerName := GetHamsterContainerNameByIndex(0) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.QuantityStartupBoostType, nil, &boostCPUQuantity, "15s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("100m", "100Mi"). + GetContainerResources(), + ). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + gomega.Expect(pod.Spec.Containers[0].Resources.Limits.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + }) + + ginkgo.It("boosts CPU on pod creation when VPA update mode is Off", func() { + initialCPU := ParseQuantityOrDie("100m") + expectedCPU := ParseQuantityOrDie("200m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with updateMode Off and a startup boost policy") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeOff). // VPA is off, but boost should still work + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "15s"). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(expectedCPU)).To(gomega.Equal(0)) + }) + + ginkgo.It("doesn't boost CPU on pod creation when scaling mode is Off", func() { + initialCPU := ParseQuantityOrDie("100m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy and scaling mode Off") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "15s"). + WithScalingMode(containerName, vpa_types.ContainerScalingModeOff). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is NOT boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(initialCPU)).To(gomega.Equal(0)) + }) +}) + func startDeploymentPods(f *framework.Framework, deployment *appsv1.Deployment) *apiv1.PodList { // Apiserver watch can lag depending on cached object count and apiserver resource usage. // We assume that watch can lag up to 5 seconds. diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index ce5e8e76074e..1d776911b3b6 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -39,6 +39,7 @@ import ( vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-base/featuregate" "k8s.io/kubernetes/test/e2e/framework" framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment" ) @@ -359,14 +360,30 @@ func PatchVpaRecommendation(f *framework.Framework, vpa *vpa_types.VerticalPodAu // AnnotatePod adds annotation for an existing pod. func AnnotatePod(f *framework.Framework, podName, annotationName, annotationValue string) { - bytes, err := json.Marshal([]patchRecord{{ + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), podName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to get pod.") + + patches := []patchRecord{} + if pod.Annotations == nil { + patches = append(patches, patchRecord{ + Op: "add", + Path: "/metadata/annotations", + Value: make(map[string]string), + }) + } + + patches = append(patches, patchRecord{ Op: "add", - Path: fmt.Sprintf("/metadata/annotations/%v", annotationName), + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), Value: annotationValue, - }}) - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Patch(context.TODO(), podName, types.JSONPatchType, bytes, metav1.PatchOptions{}) + }) + + bytes, err := json.Marshal(patches) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + patchedPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Patch(context.TODO(), podName, types.JSONPatchType, bytes, metav1.PatchOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to patch pod.") - gomega.Expect(pod.Annotations[annotationName]).To(gomega.Equal(annotationValue)) + gomega.Expect(patchedPod.Annotations[annotationName]).To(gomega.Equal(annotationValue)) } // ParseQuantityOrDie parses quantity from string and dies with an error if @@ -654,3 +671,36 @@ func anyContainsSubstring(arr []string, substr string) bool { } return false } + +// checkFeatureGateTestsEnabled check for enabled feature gates in the cluster used for the +// given VPA feature. +// Use this in a "beforeEach" call before any suites that use a featuregate. +func checkFeatureGateTestsEnabled(f *framework.Framework, feature featuregate.Feature, checkAdmission, checkUpdater bool) { + ginkgo.By(fmt.Sprintf("Checking %s cluster feature gate is on", feature)) + + if checkUpdater { + ginkgo.By(fmt.Sprintf("Checking %s VPA feature gate is enabled for updater", feature)) + + deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-updater", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1)) + vpaUpdaterPod := deploy.Spec.Template.Spec.Containers[0] + gomega.Expect(vpaUpdaterPod.Name).To(gomega.Equal("updater")) + if !anyContainsSubstring(vpaUpdaterPod.Args, fmt.Sprintf("%s=true", string(feature))) { + ginkgo.Skip(fmt.Sprintf("Skipping suite: %s feature gate is not enabled for the VPA updater", feature)) + } + } + + if checkAdmission { + ginkgo.By(fmt.Sprintf("Checking %s VPA feature gate is enabled for admission controller", feature)) + + deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-admission-controller", metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1)) + vpaAdmissionPod := deploy.Spec.Template.Spec.Containers[0] + gomega.Expect(vpaAdmissionPod.Name).To(gomega.Equal("admission-controller")) + if !anyContainsSubstring(vpaAdmissionPod.Args, fmt.Sprintf("%s=true", string(feature))) { + ginkgo.Skip(fmt.Sprintf("Skipping suite: %s feature gate is not enabled for VPA admission controller", feature)) + } + } +} diff --git a/vertical-pod-autoscaler/e2e/v1/full_vpa.go b/vertical-pod-autoscaler/e2e/v1/full_vpa.go index ec1467f58a53..21f72fb1b2f9 100644 --- a/vertical-pod-autoscaler/e2e/v1/full_vpa.go +++ b/vertical-pod-autoscaler/e2e/v1/full_vpa.go @@ -27,6 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" podsecurity "k8s.io/pod-security-admission/api" @@ -347,6 +348,122 @@ var _ = FullVpaE2eDescribe("Pods under VPA with non-recognized recommender expli }) }) +var _ = FullVpaE2eDescribe("Pods under VPA with CPUStartupBoost", ginkgo.Label("FG:CPUStartupBoost"), func() { + var ( + rc *ResourceConsumer + ) + replicas := 3 + + ginkgo.AfterEach(func() { + rc.CleanUp() + }) + + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.Describe("have CPU startup boost recommendation applied", func() { + ginkgo.BeforeEach(func() { + checkFeatureGateTestsEnabled(f, features.CPUStartupBoost, true, true) + waitForVpaWebhookRegistration(f) + }) + + ginkgo.It("to all containers of a pod", func() { + ns := f.Namespace.Name + ginkgo.By("Setting up a VPA CRD with CPUStartupBoost") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } + + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(100) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "10s"). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + // Pods should be created with boosted CPU (10m * 100 = 1000m) + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("900m"), ParseQuantityOrDie("1100m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Pods should be scaled back down in-place after they become Ready and + // StartupBoost.CPU.Duration has elapsed + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("to a subset of containers in a pod", func() { + ns := f.Namespace.Name + + ginkgo.By("Setting up a VPA CRD with CPUStartupBoost") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } + + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(100) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "10s"). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + // Pods should be created with boosted CPU (10m * 100 = 1000m) + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("900m"), ParseQuantityOrDie("1100m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Pods should be scaled back down in-place after they become Ready and + // StartupBoost.CPU.Duration has elapsed + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + +}) + var _ = FullVpaE2eDescribe("OOMing pods under VPA", func() { const replicas = 3 diff --git a/vertical-pod-autoscaler/e2e/v1/updater.go b/vertical-pod-autoscaler/e2e/v1/updater.go index a72cdf6b1eba..e8b0c1745e40 100644 --- a/vertical-pod-autoscaler/e2e/v1/updater.go +++ b/vertical-pod-autoscaler/e2e/v1/updater.go @@ -25,6 +25,8 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" @@ -211,6 +213,89 @@ var _ = UpdaterE2eDescribe("Updater", ginkgo.Label("FG:InPlaceOrRecreate"), func }) }) +var _ = UpdaterE2eDescribe("Updater", ginkgo.Label("FG:CPUStartupBoost"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkFeatureGateTestsEnabled(f, features.CPUStartupBoost, false, true) + }) + + ginkgo.It("Unboost pods when they become Ready", func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForCPUBoost(f, "100m", "100Mi") + initialPods := podList.DeepCopy() + + ginkgo.By("Waiting for pods to be in-place updated") + err := WaitForPodsUpdatedWithoutEviction(f, initialPods) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + +}) + +func setupPodsForCPUBoost(f *framework.Framework, hamsterCPU, hamsterMemory string) *apiv1.PodList { + controller := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster-deployment", + } + ginkgo.By(fmt.Sprintf("Setting up a hamster %v", controller.Kind)) + // Create pods with boosted CPU, which is 2x the target recommendation + boostedCPU := "200m" + setupHamsterController(f, controller.Kind, boostedCPU, hamsterMemory, defaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeAuto). + WithContainer(containerName). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, &factor, nil, "1s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(hamsterCPU, hamsterMemory). + GetContainerResources(), + ). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Annotating pods with boost annotation") + for _, pod := range podList.Items { + original, err := annotations.GetOriginalResourcesAnnotationValue(&pod.Spec.Containers[0]) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + AnnotatePod(f, pod.Name, annotations.StartupCPUBoostAnnotation, original) + } + return podList +} + func setupPodsForUpscalingEviction(f *framework.Framework) *apiv1.PodList { return setupPodsForEviction(f, "100m", "100Mi", nil) }