diff --git a/modules/python/clusterloader2/cri/cri.py b/modules/python/clusterloader2/cri/cri.py index 889d04c95d..6972a73b47 100644 --- a/modules/python/clusterloader2/cri/cri.py +++ b/modules/python/clusterloader2/cri/cri.py @@ -12,14 +12,15 @@ setup_logging() logger = get_logger(__name__) -MEMORY_SCALE_FACTOR = 0.95 # 95% of the total allocatable memory to account for error margin - # TODO: Refactor to use a config dataclass to reduce number of arguments # Reference: modules/python/clusterloader2/job_controller/job_controller.py def override_config_clusterloader2( node_count, node_per_step, max_pods, repeats, operation_timeout, load_type, scale_enabled, pod_startup_latency_threshold, provider, - registry_endpoint, os_type, scrape_kubelets, scrape_containerd, containerd_scrape_interval, host_network, override_file): + registry_endpoint, os_type, scrape_kubelets, scrape_containerd, containerd_scrape_interval, host_network, override_file, use_custom_kubelet = False): + MEMORY_SCALE_FACTOR = 0.95 # 95% of the total allocatable memory to account for error margin + if use_custom_kubelet: + MEMORY_SCALE_FACTOR = 1.00 # Allow full memory access for load testing client = KubernetesClient(os.path.expanduser("~/.kube/config")) nodes = client.get_nodes(label_selector="cri-resource-consume=true") if len(nodes) == 0: diff --git a/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml b/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml index 1b601dc09f..4abf0c2251 100644 --- a/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml +++ b/pipelines/perf-eval/CRI Benchmark/k8s-resource-pressure.yml @@ -1,8 +1,34 @@ trigger: none +parameters: + - name: use_custom_kubelet + displayName: "Use Custom Kubelet Flags?" + type: boolean + default: true + - name: custom_kubelet_config + displayName: "Custom Kubelet Config Object" + type: object # The parameter type is object + default: # Default value is an object with nested properties + eviction-hard: + memory.available: "200Mi" + nodefs.available: "10%" + nodefs.inodesFree: "5%" + pid.available: "2000" + eviction-soft: + memory.available: "400Mi" + nodefs.available: "15%" + nodefs.inodesFree: "10%" + pid.available: "3000" + eviction-soft-grace-period: + memory.available: "1m" + nodefs.available: "1m" + nodefs.inodesFree: "1m" + pid.available: "1m" + variables: SCENARIO_TYPE: perf-eval SCENARIO_NAME: k8s-node-stress + custom_kubelet_config_json: ${{ convertToJson(parameters.custom_kubelet_config) }} stages: - stage: azure_westus2_stress_baseline_1_34 @@ -29,6 +55,8 @@ stages: pod_startup_latency_threshold: 23s kubernetes_version: "1.34" scrape_kubelets: True + use_custom_kubelet: ${{ parameters.use_custom_kubelet }} + custom_kubelet_config: ${{ variables.custom_kubelet_config_json }} n1-p60-memory-managed: node_count: 1 max_pods: 60 @@ -39,6 +67,8 @@ stages: kubernetes_version: "1.34" k8s_os_disk_type: Managed scrape_kubelets: True + use_custom_kubelet: ${{ parameters.use_custom_kubelet }} + custom_kubelet_config: ${{ variables.custom_kubelet_config_json }} n1-p60-cpu-ephemeral: node_count: 1 max_pods: 60 @@ -48,6 +78,8 @@ stages: pod_startup_latency_threshold: 23s kubernetes_version: "1.34" scrape_kubelets: True + use_custom_kubelet: ${{ parameters.use_custom_kubelet }} + custom_kubelet_config: ${{ variables.custom_kubelet_config_json }} n1-p60-cpu-managed: node_count: 1 max_pods: 60 @@ -58,6 +90,8 @@ stages: kubernetes_version: "1.34" k8s_os_disk_type: Managed scrape_kubelets: True + use_custom_kubelet: ${{ parameters.use_custom_kubelet }} + custom_kubelet_config: ${{ variables.custom_kubelet_config_json }} max_parallel: 1 timeout_in_minutes: 120 credential_type: service_connection diff --git a/steps/topology/k8s-resource-pressure/chart/Chart.yaml b/steps/topology/k8s-resource-pressure/chart/Chart.yaml new file mode 100644 index 0000000000..b6fed55a84 --- /dev/null +++ b/steps/topology/k8s-resource-pressure/chart/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: kubelet-config-updater +description: Deploy a DaemonSet to update kubelet flags on nodes +type: application +version: 0.1.0 +appVersion: "1.0" diff --git a/steps/topology/k8s-resource-pressure/chart/templates/daemonset.yaml b/steps/topology/k8s-resource-pressure/chart/templates/daemonset.yaml new file mode 100644 index 0000000000..290cd2781d --- /dev/null +++ b/steps/topology/k8s-resource-pressure/chart/templates/daemonset.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-config-updater + namespace: kube-system + labels: + app: kubelet-config-updater +spec: + selector: + matchLabels: + app: kubelet-config-updater + template: + metadata: + labels: + app: kubelet-config-updater + spec: + hostPID: true + nodeSelector: + cri-resource-consume: "true" + tolerations: + - key: cri-resource-consume + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: cri-resource-consume + operator: "Equal" + value: "true" + effect: "NoExecute" + containers: + - name: kubelet-config-updater + image: ghcr.io/containerd/busybox:1.36 + securityContext: + privileged: true + command: + - /bin/sh + - -c + - | + echo "Updating kubelet configuration..." + export CUSTOM_EVICTION_FLAGS="{{ .Values.kubeletFlags }}" + export kubelet_flags_eof_key="--node-ip" + for kvp in $CUSTOM_EVICTION_FLAGS; do + key=${kvp%%=*} + value=${kvp#*=} + esc_key=$(printf '%s' "${key}" | sed -e 's/[][\/.^$*+?()|]/\\&/g') + esc_value=$(printf '%s' "${value}" | sed -e 's/[][\/.^$*+?()|]/\\&/g') + echo "Processing key: ${esc_key} with value: ${esc_value}" + if grep -q -E "(^|[[:space:]])${esc_key}=[^[:space:]]+" /etc/default/kubelet; then + echo "Found ${esc_key} in /etc/default/kubelet" + sed -i -r -E "s/(^|[[:space:]])(${esc_key})=[^[:space:]]*/\1\2=${esc_value}/g" "/etc/default/kubelet" + else + echo "Adding ${esc_key}=${esc_value} to /etc/default/kubelet" + repl=$(printf ' %s=%s' "${esc_key}" "${esc_value}") + sed -i -r -E "s/(${kubelet_flags_eof_key}=[^[:space:]]+)/\1${repl}/g" "/etc/default/kubelet" + export kubelet_flags_eof_key="${esc_key}" + fi + done + echo "Checking Updated kubelet configuration:" + cat /etc/default/kubelet + echo "Restarting kubelet..." + nsenter --mount=/proc/1/ns/mnt -- systemctl restart kubelet || true + echo "Done. Sleeping indefinitely to keep the pod running." + sleep infinity + volumeMounts: + - name: systemd + mountPath: /run/systemd + - name: kubelet-config + mountPath: /etc/default + volumes: + - name: kubelet-config + hostPath: + path: /etc/default + type: Directory + - name: systemd + hostPath: + path: /run/systemd + restartPolicy: Always diff --git a/steps/topology/k8s-resource-pressure/chart/values.yaml b/steps/topology/k8s-resource-pressure/chart/values.yaml new file mode 100644 index 0000000000..b85302e7d9 --- /dev/null +++ b/steps/topology/k8s-resource-pressure/chart/values.yaml @@ -0,0 +1,3 @@ +# Provide a single string with kubelet flags to add, example: +# "--eviction-hard=memory.available<100Mi --system-reserved=cpu=100m,memory=200Mi" +kubeletFlags: "" diff --git a/steps/topology/k8s-resource-pressure/validate-resources.yml b/steps/topology/k8s-resource-pressure/validate-resources.yml index e0efb27b4f..4a90926d5f 100644 --- a/steps/topology/k8s-resource-pressure/validate-resources.yml +++ b/steps/topology/k8s-resource-pressure/validate-resources.yml @@ -14,3 +14,38 @@ steps: - template: /steps/engine/clusterloader2/large-cluster/validate.yml parameters: desired_nodes: 3 +- bash: | + set -euo pipefail + set -x + + flags_string=$(echo "$CUSTOM_EVICTION_FLAGS" | jq -r ' + def format_value(k; v): + if (v|type) == "object" then + (if (k == "eviction-hard") or (k == "eviction-soft") then "<" + else "=" end) as $delim + | (v | to_entries | map(.key + $delim + (.value|tostring)) | join(",")) + elif (v|type) == "array" then + (v | map(tostring) | join(",")) + else + (v | tostring) + end; + + to_entries + | map("--" + .key + "=" + (format_value(.key; .value))) + | join(" ") + ') + + printf '%s' "$flags_string" > "kubelet_flags.txt" + + helm upgrade --install kubelet-config-updater ./steps/topology/k8s-resource-pressure/chart \ + --namespace kube-system --create-namespace \ + --set-file kubeletFlags="kubelet_flags.txt" + + sleep 5 + + kubectl get pods -A -o wide + + env: + CUSTOM_EVICTION_FLAGS: $(CUSTOM_KUBELET_CONFIG) + + displayName: "Validate Kubelet Custom Config Applied"