chore(deps): bump k8s.io/client-go from 0.35.0-alpha.0 to 0.35.0-alpha.2 #336

Summary
Jobs
- e2e-tests
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/e2e-tests-in-cluster.yaml at 05f0f75

	name: E2E Tests (In-Cluster Runner)

	on:
	workflow_dispatch:
	inputs:
	run_benchmarks:
	description: 'Run performance benchmarks'
	required: false
	type: boolean
	default: false
	schedule:
	# Run nightly at 2 AM UTC
	- cron: '0 2 * * *'
	pull_request:
	types: [labeled]

	jobs:
	e2e-tests:
	# Use self-hosted runner with 'ibm-e2e' label
	runs-on: [self-hosted, ibm-e2e]
	if: \|
	github.event_name == 'workflow_dispatch' \|\|
	github.event_name == 'schedule' \|\|
	(github.event_name == 'pull_request' && contains(github.event.label.name, 'run-e2e'))

	# Prevent concurrent e2e runs
	concurrency:
	group: e2e-tests
	cancel-in-progress: false

	timeout-minutes: 210

	container:
	# Run in a container with necessary tools
	image: golang:1.24.6
	options: --user 0

	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Install dependencies
	run: \|
	apt-get update && apt-get install -y curl jq

	# Install kubectl
	curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
	chmod +x kubectl
	mv kubectl /usr/local/bin/

	# Install IBM Cloud CLI
	curl -fsSL https://clis.cloud.ibm.com/install/linux \| sh
	ibmcloud plugin install vpc-infrastructure

	- name: Setup kubeconfig
	env:
	KUBECONFIG_CONTENT: ${{ secrets.KUBECONFIG }}
	run: \|
	# Create kubeconfig from secret
	printf '%s' "$KUBECONFIG_CONTENT" \| base64 -d > /tmp/kubeconfig
	chmod 600 /tmp/kubeconfig
	export KUBECONFIG=/tmp/kubeconfig

	# Verify the kubeconfig works
	kubectl version --client

	- name: Verify cluster access
	env:
	KUBECONFIG: /tmp/kubeconfig
	run: \|
	# Verify cluster access with provided kubeconfig
	kubectl cluster-info
	kubectl auth can-i create nodeclaims --all-namespaces
	kubectl auth can-i create nodepools --all-namespaces
	kubectl auth can-i create ibmnodeclasses --all-namespaces

	- name: Configure IBM Cloud CLI
	env:
	IBMCLOUD_API_KEY: ${{ secrets.IBMCLOUD_API_KEY }}
	run: \|
	ibmcloud login --apikey "${{ secrets.IBMCLOUD_API_KEY }}" -r "${{ secrets.IBMCLOUD_REGION }}"

	- name: Deploy latest version
	env:
	KUBECONFIG: /tmp/kubeconfig
	run: \|
	# Install or update Karpenter CRDs
	kubectl apply -f charts/crds/

	# Restart operator pods to pull latest upstream image tag
	kubectl rollout restart deployment/karpenter-karpenter-ibm -n karpenter

	- name: Pre-test cleanup
	env:
	KUBECONFIG: /tmp/kubeconfig
	run: \|
	echo "🧹 Cleaning up any existing e2e test resources..."

	# Clean up any leftover resources from previous runs
	kubectl delete pods -l test=e2e --all-namespaces --timeout=300s \|\| true
	kubectl delete deployments -l test=e2e --all-namespaces --timeout=300s \|\| true
	kubectl delete nodeclaims -l test=e2e --timeout=300s \|\| true
	kubectl delete nodepools -l test=e2e --timeout=300s \|\| true
	kubectl delete ibmnodeclasses -l test=e2e --timeout=300s \|\| true

	# Wait for cluster stabilization
	echo "⏳ Waiting for cluster stabilization..."

	# Wait for no pending e2e pods
	for i in {1..30}; do
	pending_pods=$(kubectl get pods -l test=e2e --all-namespaces --field-selector=status.phase=Pending --no-headers 2>/dev/null \| wc -l)
	if [ "$pending_pods" -eq 0 ]; then
	echo "✅ No pending e2e pods found"
	break
	fi
	echo "⏳ Still have $pending_pods pending e2e pods, waiting..."
	sleep 10
	done

	# Wait for no disrupted nodes
	for i in {1..30}; do
	disrupted_nodes=$(kubectl get nodes --no-headers -o custom-columns="NAME:.metadata.name,TAINTS:.spec.taints[*].key" \| grep -c "karpenter.sh/disrupted" 2>/dev/null \|\| echo "0")
	disrupted_nodes=$(echo "$disrupted_nodes" \| tr -d '\n' \| grep -o '[0-9]*' \|\| echo "0")
	if [ "$disrupted_nodes" -eq 0 ]; then
	echo "✅ No disrupted nodes found"
	break
	fi
	echo "⏳ Still have $disrupted_nodes disrupted nodes, waiting..."
	sleep 10
	done

	# Brief pause for final cleanup
	sleep 30
	echo "✅ Pre-test cleanup completed"

	- name: Run E2E tests (Sequential)
	env:
	RUN_E2E_TESTS: "true"
	IBMCLOUD_API_KEY: ${{ secrets.IBMCLOUD_API_KEY }}
	VPC_API_KEY: ${{ secrets.IBMCLOUD_API_KEY }}
	IBMCLOUD_REGION: ${{ secrets.IBMCLOUD_REGION }}
	TEST_VPC_ID: ${{ secrets.E2E_TEST_VPC_ID }}
	TEST_SUBNET_ID: ${{ secrets.E2E_TEST_SUBNET_ID }}
	TEST_IMAGE_ID: ${{ secrets.E2E_TEST_IMAGE_ID }}
	TEST_ZONE: ${{ secrets.E2E_TEST_ZONE }}
	TEST_SECURITY_GROUP_ID: ${{ secrets.E2E_TEST_SECURITY_GROUP_ID }}
	VPC_URL: ${{ secrets.VPC_URL }}
	KUBERNETES_API_SERVER_ENDPOINT: ${{ secrets.KUBERNETES_API_SERVER_ENDPOINT }}
	IBM_RESOURCE_GROUP_ID: ${{ secrets.IBM_RESOURCE_GROUP_ID }}
	IBM_SSH_KEY_ID: ${{ secrets.IBM_SSH_KEY_ID }}
	RUN_E2E_BENCHMARKS: ${{ inputs.run_benchmarks }}
	# Use the kubeconfig we set up
	KUBECONFIG: /tmp/kubeconfig
	# Configure e2e test behavior
	E2E_SEQUENTIAL: "true"
	E2E_CLEANUP_TIMEOUT: "300s"
	E2E_STABILIZATION_WAIT: "60s"
	run: \|
	echo "🚀 Starting E2E test suite..."

	# Define test groups
	# Core functionality tests from basic_workflow_test.go
	core_tests="TestE2EFullWorkflow TestE2ENodePoolInstanceTypeSelection TestE2EInstanceTypeSelection TestE2EDriftStability"

	# NodeClass validation tests from validation_test.go
	validation_tests="TestE2ENodeClassValidation TestE2EValidNodeClassCreation TestE2ENodeClassWithMissingFields"

	# Block device mapping tests from block_device_test.go
	block_device_tests="TestE2EBlockDeviceMapping TestE2EBlockDeviceMappingValidation"

	# Scheduling constraint tests from scheduling_test.go and e2e_taints_test.go
	scheduling_tests="TestE2EPodDisruptionBudget TestE2EConsolidationWithPDB TestE2EPodAntiAffinity TestE2ENodeAffinity TestE2EStartupTaints TestE2EStartupTaintsRemoval TestE2ETaintsBasicScheduling TestE2ETaintValues TestE2ETaintSync TestE2EUnregisteredTaintHandling"

	# UserData feature tests from userdata_test.go
	userdata_tests="TestE2EUserDataAppend TestE2EStandardBootstrap"

	# Image selector tests from image_selector_test.go
	image_selector_tests="TestE2EImageSelector"

	# Multi-zone tests from multizone_test.go
	multizone_tests="TestE2EMultiZoneDistribution TestE2EZoneAntiAffinity TestE2ETopologySpreadConstraints TestE2EPlacementStrategyValidation TestE2EZoneFailover"

	# Cleanup tests from cleanup_test.go
	cleanup_tests="TestE2ECleanupNodePoolDeletion TestE2ECleanupNodeClassDeletion TestE2ECleanupOrphanedResources TestE2ECleanupIBMCloudResources"

	# Combine all tests
	all_tests="$core_tests $validation_tests $block_device_tests $scheduling_tests $userdata_tests $image_selector_tests $multizone_tests $cleanup_tests"

	test_failed="false"
	passed_tests=0
	failed_tests=0
	total_tests=$(echo $all_tests \| wc -w)

	echo "📋 Test Suite Summary:"
	echo " Core Tests: $(echo $core_tests \| wc -w)"
	echo " Validation Tests: $(echo $validation_tests \| wc -w)"
	echo " Block Device Tests: $(echo $block_device_tests \| wc -w)"
	echo " Scheduling Tests: $(echo $scheduling_tests \| wc -w)"
	echo " UserData Tests: $(echo $userdata_tests \| wc -w)"
	echo " Image Selector Tests: $(echo $image_selector_tests \| wc -w)"
	echo " Multi-Zone Tests: $(echo $multizone_tests \| wc -w)"
	echo " Cleanup Tests: $(echo $cleanup_tests \| wc -w)"
	echo " Total Tests: $total_tests"
	echo ""

	# Run each test individually with cleanup between
	for test in $all_tests; do
	echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	echo "🧪 Running test: $test"
	echo "Progress: $((passed_tests + failed_tests + 1))/$total_tests"
	echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

	# Set appropriate timeout based on test type
	timeout="20m"
	case "$test" in
	"TestE2EDriftStability")
	timeout="30m" # Drift test needs more time for monitoring
	;;
	"TestE2EMultiZone"\|"TestE2EZone"\|"TestE2ETopology"\|"TestE2EPlacementStrategy")
	timeout="25m" # Multi-zone tests need extra time for cross-zone provisioning
	;;
	"TestE2ECleanup"*)
	timeout="15m" # Cleanup tests are typically faster
	;;
	"TestE2EValidation"\|"TestE2ENodeClass")
	timeout="10m" # Validation tests are quick
	;;
	*)
	timeout="20m" # Default timeout for other tests
	;;
	esac

	# Create test-specific log file to capture all output
	test_log="test-artifacts/${test}-$(date +%s).log"
	mkdir -p test-artifacts

	# Run test with enhanced logging and crash recovery
	set +e # Don't exit on failure
	timeout $timeout go test -tags=e2e -v -timeout $timeout ./test/e2e -run "^$test$" -count=1 2>&1 \| tee "$test_log"
	test_exit_code=$?
	set -e # Re-enable exit on failure

	if [ $test_exit_code -eq 0 ]; then
	echo "✅ Test $test passed"
	passed_tests=$((passed_tests + 1))
	else
	echo "❌ Test $test failed (exit code: $test_exit_code)"
	failed_tests=$((failed_tests + 1))

	# Enhanced debug information on failure
	echo "📊 Debug information for failed test $test:"
	echo " Exit code: $test_exit_code"
	echo " Log file: $test_log"

	# Collect system state
	kubectl get nodes --no-headers \| wc -l \| xargs echo " Total nodes:"
	kubectl get nodeclaims --no-headers 2>/dev/null \| wc -l \| xargs echo " Total nodeclaims:" \|\| echo " Total nodeclaims: 0"
	kubectl get pods -l test=e2e --all-namespaces --no-headers 2>/dev/null \| wc -l \| xargs echo " Total e2e pods:" \|\| echo " Total e2e pods: 0"

	# Collect Karpenter pod status
	echo " Karpenter pod status:"
	kubectl get pods -n karpenter -l app.kubernetes.io/name=karpenter --no-headers 2>/dev/null \|\| echo " No Karpenter pods found"

	# Collect recent events (errors and warnings)
	echo " Recent warning events:"
	kubectl get events -A --field-selector type=Warning --sort-by='.lastTimestamp' 2>/dev/null \| tail -5 \|\| echo " No warning events"

	# Check for panic or crash indicators in test log
	if grep -i "panic\\|fatal\\|segmentation\\|killed" "$test_log" >/dev/null 2>&1; then
	echo " ⚠️ Test appears to have crashed (panic/fatal error detected)"
	fi

	# Collect Karpenter logs immediately after failure
	kubectl logs -n karpenter -l app.kubernetes.io/name=karpenter --tail=100 > "test-artifacts/karpenter-logs-${test}-$(date +%s).txt" 2>/dev/null \|\| echo " Failed to collect Karpenter logs"

	test_failed="true"
	fi

	# Inter-test cleanup and stabilization
	echo "🧹 Cleaning up after test: $test"

	# Delete test-specific resources
	kubectl delete pods -l test=e2e --all-namespaces --timeout=300s \|\| true
	kubectl delete deployments -l test=e2e --all-namespaces --timeout=300s \|\| true
	kubectl delete nodeclaims -l test=e2e --timeout=300s \|\| true
	kubectl delete nodepools -l test=e2e --timeout=300s \|\| true
	kubectl delete ibmnodeclasses -l test=e2e --timeout=300s \|\| true

	# Wait for cleanup to complete
	echo "⏳ Waiting for cleanup to complete..."

	# Extended cleanup wait for drift stability test due to NodeClaim deletion timeouts
	if [ "$test" = "TestE2EDriftStability" ]; then
	echo "⏳ Extended cleanup wait for drift stability test..."
	sleep 120 # 2 minutes for NodeClaim finalizers to complete
	else
	sleep 30 # Standard cleanup wait
	fi

	# Check cluster health before next test
	kubectl get nodes --no-headers \| grep -c Ready \| xargs echo "Ready nodes:"
	kubectl get nodeclaims --no-headers \| grep -c True \| xargs echo "Ready nodeclaims:" \|\| echo "Ready nodeclaims: 0"

	echo "✅ Completed test: $test"
	echo ""
	done

	echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	echo "📊 Test Suite Results:"
	echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
	echo " Total Tests: $total_tests"
	echo " ✅ Passed: $passed_tests"
	echo " ❌ Failed: $failed_tests"
	echo " Success Rate: $((passed_tests * 100 / total_tests))%"
	echo ""

	# Check if any test failed
	if [ "$test_failed" = "true" ]; then
	echo "❌ Test suite failed with $failed_tests failures"
	exit 1
	fi

	echo "✅ All E2E tests completed successfully!"

	# Run benchmarks if requested
	if [ "$RUN_E2E_BENCHMARKS" = "true" ]; then
	echo "📊 Running performance benchmarks..."
	go test -tags=e2e -v -timeout 30m ./test/e2e/... -run=^$ -bench=.
	fi

	- name: Collect test artifacts
	if: always()
	env:
	KUBECONFIG: /tmp/kubeconfig
	run: \|
	echo "📦 Collecting comprehensive test artifacts..."
	mkdir -p test-artifacts

	# Collect Karpenter logs with different tail sizes for completeness
	echo " Collecting Karpenter logs..."
	kubectl logs -n karpenter -l app.kubernetes.io/name=karpenter --tail=2000 > test-artifacts/karpenter-logs.txt 2>/dev/null \|\| echo "Failed to collect current Karpenter logs" > test-artifacts/karpenter-logs.txt
	kubectl logs -n karpenter -l app.kubernetes.io/name=karpenter --previous --tail=1000 > test-artifacts/karpenter-logs-previous.txt 2>/dev/null \|\| echo "No previous Karpenter logs available" > test-artifacts/karpenter-logs-previous.txt

	# Collect events with different filters
	echo " Collecting events..."
	kubectl get events -A --sort-by='.lastTimestamp' > test-artifacts/events.txt 2>/dev/null \|\| echo "Failed to collect events" > test-artifacts/events.txt
	kubectl get events -A --field-selector type=Warning --sort-by='.lastTimestamp' > test-artifacts/events-warnings.txt 2>/dev/null \|\| echo "No warning events" > test-artifacts/events-warnings.txt
	kubectl get events -A --field-selector type=Normal --sort-by='.lastTimestamp' \| tail -50 > test-artifacts/events-normal-recent.txt 2>/dev/null \|\| echo "No normal events" > test-artifacts/events-normal-recent.txt

	# Collect resource states
	echo " Collecting resource states..."
	kubectl get nodes -o wide > test-artifacts/nodes.txt 2>/dev/null \|\| echo "Failed to collect nodes" > test-artifacts/nodes.txt
	kubectl get nodeclaims -o yaml > test-artifacts/nodeclaims.yaml 2>/dev/null \|\| echo "apiVersion: v1\nitems: []\nkind: List" > test-artifacts/nodeclaims.yaml
	kubectl get nodepools -o yaml > test-artifacts/nodepools.yaml 2>/dev/null \|\| echo "apiVersion: v1\nitems: []\nkind: List" > test-artifacts/nodepools.yaml
	kubectl get ibmnodeclasses -o yaml > test-artifacts/ibmnodeclasses.yaml 2>/dev/null \|\| echo "apiVersion: v1\nitems: []\nkind: List" > test-artifacts/ibmnodeclasses.yaml

	# Collect Karpenter deployment status
	echo " Collecting Karpenter deployment status..."
	kubectl describe deployment -n karpenter karpenter-karpenter-ibm > test-artifacts/karpenter-deployment.txt 2>/dev/null \|\| echo "Failed to describe Karpenter deployment" > test-artifacts/karpenter-deployment.txt
	kubectl get pods -n karpenter -o wide > test-artifacts/karpenter-pods.txt 2>/dev/null \|\| echo "Failed to get Karpenter pods" > test-artifacts/karpenter-pods.txt

	# Collect any crash dumps or additional logs
	echo " Collecting additional diagnostics..."
	kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded > test-artifacts/problematic-pods.txt 2>/dev/null \|\| echo "No problematic pods found" > test-artifacts/problematic-pods.txt

	# Create summary of artifacts
	echo " Creating artifact summary..."
	{
	echo "E2E Test Artifacts Summary"
	echo "========================="
	echo "Generated: $(date)"
	echo "Test run ID: ${{ github.run_id }}"
	echo ""
	echo "Files collected:"
	ls -la test-artifacts/ 2>/dev/null \|\| echo "No artifacts directory"
	} > test-artifacts/README.txt

	echo "✅ Test artifact collection completed"

	- name: Upload test artifacts
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: e2e-test-artifacts-${{ github.run_id }}
	path: test-artifacts/
	retention-days: 7

	- name: Cleanup test resources
	if: always()
	env:
	IBMCLOUD_API_KEY: ${{ secrets.IBMCLOUD_API_KEY }}
	KUBECONFIG: /tmp/kubeconfig
	run: \|
	echo "🧹 Starting comprehensive cleanup..."

	# Clean up Kubernetes resources with extended timeouts
	echo "Cleaning up Kubernetes resources..."
	kubectl delete pods -l test=e2e --all-namespaces --timeout=10m \|\| true
	kubectl delete deployments -l test=e2e --all-namespaces --timeout=10m \|\| true
	kubectl delete nodeclaims -l test=e2e --timeout=10m \|\| true
	kubectl delete nodepools -l test=e2e --timeout=10m \|\| true
	kubectl delete ibmnodeclasses -l test=e2e --timeout=10m \|\| true

	# Force cleanup any stuck resources with direct patching
	echo "Force cleaning up any stuck resources..."
	kubectl patch nodeclaims --selector test=e2e --type='merge' -p='{"metadata":{"finalizers":[]}}' \|\| true
	kubectl patch nodepools --selector test=e2e --type='merge' -p='{"metadata":{"finalizers":[]}}' \|\| true
	kubectl patch ibmnodeclasses --selector test=e2e --type='merge' -p='{"metadata":{"finalizers":[]}}' \|\| true

	# Clean up IBM Cloud instances created by e2e tests
	echo "Cleaning up IBM Cloud instances..."
	ibmcloud is instances --output json \| \
	jq -r '.[] \| select(.tags \| index("karpenter-e2e")) \| .id' \| \
	xargs -I {} ibmcloud is instance-delete {} --force \|\| true

	# Clean up orphaned VNIs (Virtual Network Interfaces)
	echo "Cleaning up orphaned VNIs..."
	ibmcloud is virtual-network-interfaces --output json \| \
	jq -r '.[] \| select(.name \| test("e2e-.*-vni")) \| .id' \| \
	xargs -I {} ibmcloud is virtual-network-interface-delete {} --force \|\| true

	# Clean up orphaned volumes
	echo "Cleaning up orphaned volumes..."
	ibmcloud is volumes --output json \| \
	jq -r '.[] \| select(.name \| test("e2e-.*-boot")) \| .id' \| \
	xargs -I {} ibmcloud is volume-delete {} --force \|\| true

	echo "✅ Cleanup completed"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

chore(deps): bump k8s.io/client-go from 0.35.0-alpha.0 to 0.35.0-alpha.2 #336

Workflow file

chore(deps): bump k8s.io/client-go from 0.35.0-alpha.0 to 0.35.0-alpha.2 #336

Uh oh!

Jobs

Run details

Workflow file for this run