diff --git a/.pipelines/azure_pipeline_mergedbranches.yaml b/.pipelines/azure_pipeline_mergedbranches.yaml
index 291772961..a578302d3 100644
--- a/.pipelines/azure_pipeline_mergedbranches.yaml
+++ b/.pipelines/azure_pipeline_mergedbranches.yaml
@@ -42,7 +42,15 @@ extends:
     customBuildTags:
     - ES365AIMigrationTooling
     stages:
+    # This stage will be skipped when LinuxImageOverride and WindowsImageOverride are both set
+    # This feature allows bypassing the build stage when using pre-built images for testing, which saves time and resources.
     - stage: stage
+      displayName: 'Build and Publish Container Images'
+      condition: |
+        or(
+          eq(variables['LinuxImageOverride'], ''),
+          eq(variables['WindowsImageOverride'], '')
+        )
       jobs:
       - job: common
         pool:
@@ -185,7 +193,17 @@ extends:
               docker pull mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:buildx-stable-1
               docker buildx create --name dockerbuilder --driver docker-container --driver-opt image=mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:buildx-stable-1 --use
               docker buildx inspect --bootstrap
+              # Determine if we should push to ACR
+              # Push when: NOT a PR, OR when PR is from specific branches (zane/ci-agent-auto-deploy or branches containing 'run-e2e')
+              SHOULD_PUSH="false"
               if [ "$(Build.Reason)" != "PullRequest" ]; then
+                SHOULD_PUSH="true"
+              elif [[ "$(System.PullRequest.SourceBranch)" == "zane/ci-agent-auto-deploy" ]] || [[ "$(System.PullRequest.SourceBranch)" == *"run-e2e"* ]]; then
+                SHOULD_PUSH="true"
+                echo "PR from branch $(System.PullRequest.SourceBranch) - will push image to ACR for E2E testing"
+              fi
+              
+              if [ "$SHOULD_PUSH" == "true" ]; then
                 docker buildx build --platform $(BUILD_PLATFORMS) --tag ${{ variables.repoImageName }}:$(linuxImagetag) -f kubernetes/linux/Dockerfile.multiarch --metadata-file $(Build.ArtifactStagingDirectory)/linux/metadata.json --build-arg IMAGE_TAG=$(linuxTelemetryTag) --build-arg GOLANG_BASE_IMAGE=$(GOLANG_BASE_IMAGE) --build-arg CI_BASE_IMAGE=$(CI_BASE_IMAGE) --push --provenance=false .
                 echo "##vso[task.logissue type=warning]Linux image built with tag: ${{ variables.repoImageName }}:$(linuxImagetag)"
                 docker pull ${{ variables.repoImageName }}:$(linuxImagetag)
@@ -543,7 +561,16 @@ extends:
           inputs:
             targetType: 'inline'
             script: |
+              # Push when: NOT a PR, OR when PR is from specific branches (zane/ci-agent-auto-deploy or branches containing 'run-e2e')
+              $shouldPush = $false
               if ("$(Build.Reason)" -ne "PullRequest") {
+                $shouldPush = $true
+              } elseif ("$(System.PullRequest.SourceBranch)" -eq "zane/ci-agent-auto-deploy" -or "$(System.PullRequest.SourceBranch)" -like "*run-e2e*") {
+                $shouldPush = $true
+                Write-Host "PR from branch $(System.PullRequest.SourceBranch) - will push image to ACR for E2E testing"
+              }
+              
+              if ($shouldPush) {
                 docker push ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2019BaseImageVersion)
               }
         - task: CodeQL3000Finalize@0
@@ -751,7 +778,16 @@ extends:
           inputs:
             targetType: 'inline'
             script: |
+              # Push when: NOT a PR, OR when PR is from specific branches (zane/ci-agent-auto-deploy or branches containing 'run-e2e')
+              $shouldPush = $false
               if ("$(Build.Reason)" -ne "PullRequest") {
+                $shouldPush = $true
+              } elseif ("$(System.PullRequest.SourceBranch)" -eq "zane/ci-agent-auto-deploy" -or "$(System.PullRequest.SourceBranch)" -like "*run-e2e*") {
+                $shouldPush = $true
+                Write-Host "PR from branch $(System.PullRequest.SourceBranch) - will push image to ACR for E2E testing"
+              }
+              
+              if ($shouldPush) {
                 docker push ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2022BaseImageVersion)
               }
         - task: CodeQL3000Finalize@0
@@ -792,7 +828,16 @@ extends:
               az account set -s ${{ variables.subscription }}
               az acr login -n ${{ variables.containerRegistry }}
               @{"image.name"="${{ variables.repoImageName }}:$(windowsImageTag)"} | ConvertTo-Json -Compress | Out-File -Encoding ascii $(Build.ArtifactStagingDirectory)/windows/metadata.json
+              # Push when: NOT a PR, OR when PR is from specific branches (zane/ci-agent-auto-deploy or branches containing 'run-e2e')
+              $shouldPush = $false
               if ("$(Build.Reason)" -ne "PullRequest") {
+                $shouldPush = $true
+              } elseif ("$(System.PullRequest.SourceBranch)" -eq "zane/ci-agent-auto-deploy" -or "$(System.PullRequest.SourceBranch)" -like "*run-e2e*") {
+                $shouldPush = $true
+                Write-Host "PR from branch $(System.PullRequest.SourceBranch) - will push multi-arch image to ACR for E2E testing"
+              }
+              
+              if ($shouldPush) {
                  docker manifest create ${{ variables.repoImageName }}:$(windowsImageTag) ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2019BaseImageVersion) ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2022BaseImageVersion)
                  docker manifest push ${{ variables.repoImageName }}:$(windowsImageTag)
                  Write-Host "##vso[task.logissue type=warning]Windows image built with tag: ${{ variables.repoImageName }}:$(windowsImageTag)"
@@ -880,4 +925,70 @@ extends:
             ScanType: CustomScan
             FileDirPath: '$(Build.ArtifactStagingDirectory)'
             DisableRemediation: false
-            AcceptableOutdatedSignatureInHours: 72
\ No newline at end of file
+            AcceptableOutdatedSignatureInHours: 72
+
+    - stage: Deploy_and_Test_Images_In_Dev_Clusters
+      displayName: Deploy and Test Images in Dev Clusters
+      lockBehavior: sequential
+      dependsOn: 
+        - stage
+      # Deploy runs when Build succeeds OR when Build is skipped with valid overrides
+      # This stage runs when:
+      # 1. Direct push to ci_prod or zane/ci-agent-auto-deploy or branches containing 'run-e2e'
+      # 2. PR from zane/ci-agent-auto-deploy branch OR PR from branch containing 'run-e2e'
+      condition: |
+        and(
+          or(
+            eq(variables['Build.SourceBranch'], 'refs/heads/ci_prod'),
+            eq(variables['Build.SourceBranch'], 'refs/heads/zane/ci-agent-auto-deploy'),
+            contains(variables['Build.SourceBranch'], 'run-e2e'),
+            and(
+              eq(variables['Build.Reason'], 'PullRequest'),
+              or(
+                eq(variables['System.PullRequest.SourceBranch'], 'zane/ci-agent-auto-deploy'),
+                contains(variables['System.PullRequest.SourceBranch'], 'run-e2e')
+              )
+            )
+          ),
+          or(
+            eq(dependencies.stage.result, 'Succeeded'),
+            and(
+              eq(dependencies.stage.result, 'Skipped'),
+              ne(variables['LinuxImageOverride'], ''),
+              ne(variables['WindowsImageOverride'], '')
+            )
+          )
+        )
+      variables:
+        # Use images built from previous build stage by default
+        # To override: Set pipeline variables 'LinuxImageOverride' and 'WindowsImageOverride' when queuing
+        linuxImageTagUnderTest: $[coalesce(variables['LinuxImageOverride'], stageDependencies.stage.common.outputs['setup.linuxImagetag'])]
+        windowsImageTagUnderTest: $[coalesce(variables['WindowsImageOverride'], stageDependencies.stage.common.outputs['setup.windowsImageTag'])]
+      jobs:
+      # TODO: gradually add more clusters from test automation framework when the tests are stable
+      # TODO: TeamsWebhookUri to be added
+      # Cluster 1: zane-test Cluster
+      - template: /.pipelines/e2e-test/azure-template-deploy-and-test-ci-image-in-aks-cluster.yml@self
+        parameters:
+          clusterName: 'zane-test'
+          resourceGroup: 'zane-test'
+          azureSubscription: 'ContainerInsights_Build_Subscription_CI'
+          environmentName: 'CI-Agent-Dev'
+          linuxImageTag: $(linuxImageTagUnderTest)
+          windowsImageTag: $(windowsImageTagUnderTest)
+          azureClientId: $(AksZaneTestClientId)
+          azureTenantId: $(AzureZaneTestTenantId)
+          teamsWebhookUri: $(TeamsWebhookUri)
+
+      # Cluster 2: zane-test2 Cluster
+      - template: /.pipelines/e2e-test/azure-template-deploy-and-test-ci-image-in-aks-cluster.yml@self
+        parameters:
+          clusterName: 'zane-test2'
+          resourceGroup: 'zane-test'
+          azureSubscription: 'ContainerInsights_Build_Subscription_CI'
+          environmentName: 'CI-Agent-Dev2'
+          linuxImageTag: $(linuxImageTagUnderTest)
+          windowsImageTag: $(windowsImageTagUnderTest)
+          azureClientId: $(AksZaneTest2ClientId)
+          azureTenantId: $(AzureZaneTestTenantId)
+          teamsWebhookUri: $(TeamsWebhookUri)
diff --git a/.pipelines/e2e-test/azure-template-deploy-and-test-ci-image-in-aks-cluster.yml b/.pipelines/e2e-test/azure-template-deploy-and-test-ci-image-in-aks-cluster.yml
new file mode 100644
index 000000000..f25bdc793
--- /dev/null
+++ b/.pipelines/e2e-test/azure-template-deploy-and-test-ci-image-in-aks-cluster.yml
@@ -0,0 +1,306 @@
+parameters:
+- name: clusterName
+  type: string
+- name: resourceGroup
+  type: string
+- name: azureSubscription
+  type: string
+  default: 'ContainerInsights_Build_Subscription_CI'
+- name: environmentName
+  type: string
+- name: linuxImageTag
+  type: string
+- name: windowsImageTag
+  type: string
+- name: azureClientId
+  type: string
+- name: azureTenantId
+  type: string
+- name: teamsWebhookUri
+  type: string
+  default: '$(TeamsWebhookUri)'
+- name: additionalTestParams
+  type: string
+  default: ''
+
+jobs:
+- deployment: Deploy_${{ replace(parameters.clusterName, '-', '_') }}
+  displayName: 'Deploy & Test: ${{ parameters.clusterName }}'
+  environment: ${{ parameters.environmentName }}
+  pool:
+    name: Azure-Pipelines-CI-Test-EO
+    image: ci-1es-managed-ubuntu-2204
+    os: linux
+  variables:
+    skipComponentGovernanceDetection: true
+  strategy:
+    runOnce:
+      deploy:
+        steps:
+        # Log deployment start
+        - bash: |
+            set -euo pipefail
+            
+            echo "========================================="
+            echo "CLUSTER DEPLOYMENT STARTING"
+            echo "========================================="
+            echo "Cluster: ${{ parameters.clusterName }}"
+            echo "Environment: ${{ parameters.environmentName }}"
+            echo "Build ID: $(Build.BuildId)"
+            echo "Pipeline Run: $(Build.BuildNumber)"
+            echo ""
+            echo "✓ Sequential deployment locking enabled at stage level"
+            echo "✓ Multiple pipeline runs will execute sequentially"
+            echo "========================================="
+          displayName: 'Deployment Start'
+          
+        - checkout: self
+          persistCredentials: true
+        
+        - script: |
+            set -euo pipefail
+            echo "Ensuring kubectl & helm are installed"
+            if ! command -v kubectl >/dev/null 2>&1; then
+              echo "Installing kubectl"
+              sudo az aks install-cli
+            else
+              echo "kubectl already installed: $(kubectl version --client --short || true)"
+            fi
+            if ! command -v helm >/dev/null 2>&1; then
+              echo "Installing Helm 3"
+              curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+            else
+              echo "Helm already installed: $(helm version --short || true)"
+            fi
+          displayName: 'Install kubectl and Helm'
+        
+        - task: AzureCLI@2
+          displayName: 'Get credentials for ${{ parameters.clusterName }}'
+          inputs:
+            azureSubscription: ${{ parameters.azureSubscription }}
+            scriptLocation: 'inlineScript'
+            scriptType: 'bash'
+            inlineScript: 'az aks get-credentials -g ${{ parameters.resourceGroup }} -n ${{ parameters.clusterName }}'
+        
+        # Determine MCR repository paths based on image tags.
+        - task: Bash@3
+          name: DetermineMcrRepo
+          displayName: 'Determine MCR Repository Paths'
+          env:
+            LINUX_IMAGE_TAG: ${{ parameters.linuxImageTag }}
+            WINDOWS_IMAGE_TAG: ${{ parameters.windowsImageTag }}
+          inputs:
+            targetType: 'inline'
+            script: |
+              # Function to determine registry path based on image tag
+              # CI dev builds contain git hash pattern (e.g., -gbdc2f3f42-20250701203056)
+              # Production releases are simple versions (e.g., 3.1.32)
+              get_mcr_repo() {
+                local image_tag="$1"
+                if [[ "$image_tag" =~ -g[a-f0-9]+-[0-9]+ ]]; then
+                  echo "mcr.microsoft.com/azuremonitor/containerinsights/cidev"
+                else
+                  echo "mcr.microsoft.com/azuremonitor/containerinsights/ciprod"
+                fi
+              }
+              
+              LINUX_MCR_REPO=$(get_mcr_repo "$LINUX_IMAGE_TAG")
+              WINDOWS_MCR_REPO=$(get_mcr_repo "$WINDOWS_IMAGE_TAG")
+              
+              echo "Repository Path Detection:"
+              echo "  Linux image tag: $LINUX_IMAGE_TAG"
+              echo "  → Linux MCR repo: $LINUX_MCR_REPO"
+              echo "  Windows image tag: $WINDOWS_IMAGE_TAG"
+              echo "  → Windows MCR repo: $WINDOWS_MCR_REPO"
+              
+              # Export for subsequent steps
+              echo "##vso[task.setvariable variable=linuxMcrRepo;isOutput=true]$LINUX_MCR_REPO"
+              echo "##vso[task.setvariable variable=windowsMcrRepo;isOutput=true]$WINDOWS_MCR_REPO"
+        
+        # TODO: consider to use helm chart when it is ready for aks deployment
+        - task: Bash@3
+          displayName: 'Patch ama-logs pods with new images'
+          env:
+            LINUX_IMAGE_TAG: ${{ parameters.linuxImageTag }}
+            WINDOWS_IMAGE_TAG: ${{ parameters.windowsImageTag }}
+            LINUX_MCR_REPO: $(DetermineMcrRepo.linuxMcrRepo)
+            WINDOWS_MCR_REPO: $(DetermineMcrRepo.windowsMcrRepo)
+          inputs:
+            targetType: 'inline'
+            script: |
+              echo "Deploying to cluster: ${{ parameters.clusterName }}"
+              echo "  Linux image: $LINUX_MCR_REPO:$LINUX_IMAGE_TAG"
+              echo "  Windows image: $WINDOWS_MCR_REPO:$WINDOWS_IMAGE_TAG"
+              echo ""
+              echo "Finding and patching ama-logs pods in kube-system namespace..."
+              
+              kubectl get pods -n kube-system --no-headers | grep ama-logs | awk '{print $1}' | while read pod_name; do
+                echo "Processing pod: $pod_name"
+                
+                if [[ "$pod_name" =~ ^ama-logs-windows ]]; then
+                  IMG_URL="$WINDOWS_MCR_REPO:$WINDOWS_IMAGE_TAG"
+                  container_name="ama-logs-windows"
+                elif [[ "$pod_name" =~ ^ama-logs-rs ]] || [[ "$pod_name" =~ ^ama-logs-[a-z0-9]{5}$ ]]; then
+                  IMG_URL="$LINUX_MCR_REPO:$LINUX_IMAGE_TAG"
+                  container_name="ama-logs"
+                else
+                  echo "  ⚠ Unknown pod pattern: $pod_name - skipping"
+                  continue
+                fi
+                
+                echo "  → Patching with image: $IMG_URL (container: $container_name)"
+                
+                kubectl patch pod "$pod_name" -n kube-system \
+                  --patch "{\"spec\": {\"containers\": [{\"name\": \"$container_name\", \"image\": \"$IMG_URL\"}]}}" \
+                  && echo "  ✓ Successfully patched $pod_name" \
+                  || echo "  ✗ Failed to patch $pod_name"
+              done
+              
+              echo ""
+              echo "Pod patching complete!"
+              echo "Current UTC time: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+              echo "Current ama-logs pods:"
+              kubectl get pods -n kube-system | grep ama-logs
+        
+        # Wait for Kubernetes API to update and pods to begin restarting
+        - task: Bash@3
+          displayName: 'Wait for pod patch to propagate (5 minutes)'
+          inputs:
+            targetType: 'inline'
+            script: |
+              echo "========================================"
+              echo "Waiting for Pod Patch Propagation"
+              echo "========================================"
+              echo ""
+              echo "Waiting 5 minutes"
+              echo ""
+              
+              wait_time=300
+              interval=30
+              elapsed=0
+              
+              while [ $elapsed -lt $wait_time ]; do
+                remaining=$((wait_time - elapsed))
+                echo "⏳ Waiting... ($elapsed/$wait_time seconds elapsed, $remaining seconds remaining)"
+                sleep $interval
+                elapsed=$((elapsed + interval))
+              done
+              
+              echo ""
+              echo "✓ Wait complete! Now checking actual pod readiness status..."
+              echo "========================================"
+        
+        # Pre-test verification: Wait for pods to be ready with new images
+        - task: Bash@3
+          displayName: 'Pre-Test: Wait for pods to be ready with new images'
+          env:
+            LINUX_IMAGE_TAG: ${{ parameters.linuxImageTag }}
+            WINDOWS_IMAGE_TAG: ${{ parameters.windowsImageTag }}
+            LINUX_MCR_REPO: $(DetermineMcrRepo.linuxMcrRepo)
+            WINDOWS_MCR_REPO: $(DetermineMcrRepo.windowsMcrRepo)
+          inputs:
+            targetType: 'inline'
+            script: |
+              chmod +x $(Build.SourcesDirectory)/.pipelines/e2e-test/verify-ci-images-before-test.sh
+              $(Build.SourcesDirectory)/.pipelines/e2e-test/verify-ci-images-before-test.sh "$LINUX_IMAGE_TAG" "$WINDOWS_IMAGE_TAG" "$LINUX_MCR_REPO" "$WINDOWS_MCR_REPO"
+        
+        # Capture container start times for Log Analytics query filtering
+        - task: Bash@3
+          name: CaptureStartTime
+          displayName: 'Capture container start times for Log Analytics filtering'
+          env:
+            LINUX_IMAGE_TAG: ${{ parameters.linuxImageTag }}
+            WINDOWS_IMAGE_TAG: ${{ parameters.windowsImageTag }}
+            LINUX_MCR_REPO: $(DetermineMcrRepo.linuxMcrRepo)
+            WINDOWS_MCR_REPO: $(DetermineMcrRepo.windowsMcrRepo)
+          inputs:
+            targetType: 'inline'
+            script: |
+              chmod +x $(Build.SourcesDirectory)/.pipelines/e2e-test/capture-container-start-time.sh
+              $(Build.SourcesDirectory)/.pipelines/e2e-test/capture-container-start-time.sh "$LINUX_IMAGE_TAG" "$WINDOWS_IMAGE_TAG" "$LINUX_MCR_REPO" "$WINDOWS_MCR_REPO"
+              
+              # Export container start time for use in tests
+              if [ -f /tmp/container-deployment-time.env ]; then
+                source /tmp/container-deployment-time.env
+                echo "Container start time captured: $CONTAINER_START_TIME"
+                echo "##vso[task.setvariable variable=CONTAINER_START_TIME;isOutput=true]$CONTAINER_START_TIME"
+              else
+                echo "ERROR: Container start time not found at /tmp/container-deployment-time.env"
+                echo "This is required for Log Analytics query filtering"
+                exit 1
+              fi
+        
+        - task: Bash@3
+          displayName: 'Wait for logs to be ingested into Log Analytics (20 min)'
+          inputs:
+            targetType: 'inline'
+            script: |
+              echo "========================================"
+              echo "Waiting for Log Analytics Ingestion"
+              echo "========================================"
+              echo "Cluster: ${{ parameters.clusterName }}"
+              echo "Container start time: $(CaptureStartTime.CONTAINER_START_TIME)"
+              echo ""
+              echo "Waiting 20 minutes to allow logs to be ingested..."
+              echo "This ensures queries will find logs from the newly deployed containers."
+              echo ""
+              
+              wait_time=1200 #TODO: change back to 1200 (20 minutes) after testing
+              interval=60
+              elapsed=0
+              
+              while [ $elapsed -lt $wait_time ]; do
+                remaining=$((wait_time - elapsed))
+                minutes_elapsed=$((elapsed / 60))
+                minutes_remaining=$((remaining / 60))
+                echo "⏳ Waiting... ($minutes_elapsed/$((wait_time / 60)) minutes elapsed, $minutes_remaining minutes remaining)"
+                sleep $interval
+                elapsed=$((elapsed + interval))
+              done
+              
+              echo ""
+              echo "✓ Wait complete! Logs should now be available in Log Analytics."
+              echo "✓ Tests will query logs with filter: TimeGenerated > datetime('$(CaptureStartTime.CONTAINER_START_TIME)')"
+              echo "========================================"
+        # TODO (improvement): container start time is captured in previous step, but not used for now. Consider passing container start time to test script to use in log queries
+        - bash: |
+            # Pass container start time to tests
+            export CONTAINER_START_TIME="$(CaptureStartTime.CONTAINER_START_TIME)"
+            echo "Running tests for cluster: ${{ parameters.clusterName }}"
+            echo "Container start time: $CONTAINER_START_TIME"
+            
+            chmod +x ./install-and-execute-testkube-tests.sh
+            ./install-and-execute-testkube-tests.sh \
+              AzureClientId=${{ parameters.azureClientId }} \
+              AzureTenantId=${{ parameters.azureTenantId }} \
+              TeamsWebhookUri=${{ parameters.teamsWebhookUri }} \
+              ${{ parameters.additionalTestParams }}
+          workingDirectory: $(Build.SourcesDirectory)/test/testkube/
+          displayName: 'Install Testkube and run E2E tests'
+        
+        # Post-test verification: Check pods are still healthy after test execution
+        - task: Bash@3
+          displayName: 'Post-Test: Verify pods remained stable after tests'
+          condition: always()
+          env:
+            LINUX_IMAGE_TAG: ${{ parameters.linuxImageTag }}
+            WINDOWS_IMAGE_TAG: ${{ parameters.windowsImageTag }}
+            LINUX_MCR_REPO: $(DetermineMcrRepo.linuxMcrRepo)
+            WINDOWS_MCR_REPO: $(DetermineMcrRepo.windowsMcrRepo)
+          inputs:
+            targetType: 'inline'
+            script: |
+              chmod +x $(Build.SourcesDirectory)/.pipelines/e2e-test/verify-ci-images-after-test.sh
+              $(Build.SourcesDirectory)/.pipelines/e2e-test/verify-ci-images-after-test.sh "$LINUX_IMAGE_TAG" "$WINDOWS_IMAGE_TAG" "$LINUX_MCR_REPO" "$WINDOWS_MCR_REPO"
+        
+        # Log deployment completion
+        - bash: |
+            echo "========================================="
+            echo "DEPLOYMENT COMPLETE"
+            echo "========================================="
+            echo "Cluster: ${{ parameters.clusterName }}"
+            echo "Build ID: $(Build.BuildId)"
+            echo "✓ Deployment finished for: ${{ parameters.clusterName }}"
+            echo "========================================="
+          displayName: 'Deployment Completion'
+          condition: always()
diff --git a/.pipelines/e2e-test/capture-container-start-time.sh b/.pipelines/e2e-test/capture-container-start-time.sh
new file mode 100644
index 000000000..b3c9b4322
--- /dev/null
+++ b/.pipelines/e2e-test/capture-container-start-time.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Capture Container Start Times
+# Captures the LATEST container start time across all ama-logs pods
+# This is used to filter Log Analytics queries to only show logs from the newly deployed containers
+
+set -e
+
+# Parse command line arguments
+LINUX_IMAGE_TAG="${1}"
+WINDOWS_IMAGE_TAG="${2}"
+LINUX_MCR_REPO="${3}"
+WINDOWS_MCR_REPO="${4}"
+
+if [ -z "$LINUX_IMAGE_TAG" ] || [ -z "$WINDOWS_IMAGE_TAG" ] || [ -z "$LINUX_MCR_REPO" ] || [ -z "$WINDOWS_MCR_REPO" ]; then
+  echo "Error: Missing required parameters"
+  echo "Usage: $0 <linux-image-tag> <windows-image-tag> <linux-mcr-repo> <windows-mcr-repo>"
+  exit 1
+fi
+
+LINUX_IMAGE="$LINUX_MCR_REPO:$LINUX_IMAGE_TAG"
+WINDOWS_IMAGE="$WINDOWS_MCR_REPO:$WINDOWS_IMAGE_TAG"
+
+# Source shared functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/util.sh"
+
+echo "================================"
+echo "Container Start Time Capture"
+echo "================================"
+echo "Capturing LATEST container start time for Log Analytics queries..."
+echo ""
+
+
+# Build pod configurations using shared function
+declare -a pod_configs
+build_pod_configs "$LINUX_IMAGE" "$WINDOWS_IMAGE"
+
+if [ ${#pod_configs[@]} -eq 0 ]; then
+  echo "✗ ERROR: No pods found!"
+  exit 1
+fi
+
+latest_start_time=""
+
+for config in "${pod_configs[@]}"; do
+  IFS='|' read -r pod_name expected_image container_name <<< "$config"
+  
+  # Get container start time for the specific container
+  start_time=$(kubectl get pod "$pod_name" -n kube-system \
+    -o jsonpath="{.status.containerStatuses[?(@.name=='$container_name')].state.running.startedAt}" 2>/dev/null || echo "")
+  
+  if [ -n "$start_time" ]; then
+    echo "  Pod $pod_name (container: $container_name) started at: $start_time"
+    
+    # Track LATEST time (lexicographically later in ISO 8601 format)
+    if [ -z "$latest_start_time" ] || [[ "$start_time" > "$latest_start_time" ]]; then
+      latest_start_time="$start_time"
+    fi
+  else
+    echo "✗ ERROR: Could not determine container start time for pod $pod_name (container: $container_name)"
+    echo "This is required for Log Analytics query filtering"
+    exit 1
+  fi
+done
+
+if [ -n "$latest_start_time" ]; then
+  # Validate that start time is recent (within last 30 minutes)
+  # This ensures we captured the newly deployed containers, not old ones
+  current_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+  current_epoch=$(date -u -d "$current_time" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%SZ" "$current_time" +%s 2>/dev/null)
+  start_epoch=$(date -u -d "$latest_start_time" +%s 2>/dev/null || date -u -j -f "%Y-%m-%dT%H:%M:%SZ" "$latest_start_time" +%s 2>/dev/null)
+  time_diff=$((current_epoch - start_epoch))
+  time_diff_minutes=$((time_diff / 60))
+  
+  echo ""
+  echo "Time validation:"
+  echo "  Current UTC time: $current_time"
+  echo "  Latest start time: $latest_start_time"
+  echo "  Time difference: $time_diff_minutes minutes ago"
+  
+  if [ $time_diff_minutes -gt 30 ]; then
+    echo ""
+    echo "⚠ WARNING: Container start time is $time_diff_minutes minutes old!"
+    echo "This suggests the containers may not have been restarted with the new images."
+    echo "Expected: Within ~2-5 minutes (time for pods to restart after patching)"
+    echo "Consider investigating if the image patch actually triggered pod restarts."
+  else
+    echo "  ✓ Start time is recent (within expected range)"
+  fi
+  
+  # Export for use in tests
+  echo "CONTAINER_START_TIME=$latest_start_time" > /tmp/container-deployment-time.env
+  echo ""
+  echo "✓ LATEST container start time: $latest_start_time"
+  echo "✓ Saved to /tmp/container-deployment-time.env"
+  echo "✓ Log Analytics queries should filter: TimeGenerated > datetime('$latest_start_time')"
+  echo ""
+  exit 0
+else
+  echo "✗ ERROR: Could not determine container start times"
+  exit 1
+fi
diff --git a/.pipelines/e2e-test/util.sh b/.pipelines/e2e-test/util.sh
new file mode 100644
index 000000000..b157451c5
--- /dev/null
+++ b/.pipelines/e2e-test/util.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Shared functions for pod verification scripts
+# This file should be sourced by pre-test and post-test verification scripts
+
+# Function to build pod configurations
+# Parameters:
+#   $1 - LINUX_IMAGE (full image path with tag)
+#   $2 - WINDOWS_IMAGE (full image path with tag)
+# Returns:
+#   pod_configs array populated with "pod_name|expected_image|container_name"
+build_pod_configs() {
+  local LINUX_IMAGE="$1"
+  local WINDOWS_IMAGE="$2"
+  
+  echo "Getting list of ama-logs pods..."
+  local pod_list=$(kubectl get pods -n kube-system --no-headers | grep ama-logs | awk '{print $1}')
+  
+  # Clear the global pod_configs array
+  pod_configs=()
+  
+  for pod_name in $pod_list; do
+    local expected_image
+    local container_name
+    
+    # Determine expected image and container name based on pod type
+    if [[ "$pod_name" =~ ^ama-logs-windows ]]; then
+      expected_image="$WINDOWS_IMAGE"
+      container_name="ama-logs-windows"
+    elif [[ "$pod_name" =~ ^ama-logs-rs ]] || [[ "$pod_name" =~ ^ama-logs-[a-z0-9]{5}$ ]]; then
+      expected_image="$LINUX_IMAGE"
+      container_name="ama-logs"
+    else
+      echo "✗ ERROR: Unknown pod pattern: $pod_name"
+      echo "Expected pod names to match one of:"
+      echo "  - ama-logs-windows-* (Windows pods)"
+      echo "  - ama-logs-rs-* (Linux ReplicaSet pods)"
+      echo "  - ama-logs-xxxxx (Linux DaemonSet pods, 5 alphanumeric chars)"
+      exit 1
+    fi
+    
+    pod_configs+=("$pod_name|$expected_image|$container_name")
+  done
+  
+  echo "Found ${#pod_configs[@]} pods to verify"
+  echo ""
+}
diff --git a/.pipelines/e2e-test/verify-ci-images-after-test.sh b/.pipelines/e2e-test/verify-ci-images-after-test.sh
new file mode 100644
index 000000000..d3e5cf04a
--- /dev/null
+++ b/.pipelines/e2e-test/verify-ci-images-after-test.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# Post-Test Pod Verification
+# Performs a quick health check to ensure pods maintained correct images and are still healthy
+# This script is used AFTER running E2E tests to detect any pod restarts or issues during testing
+
+set -e
+
+# Parse command line arguments
+LINUX_IMAGE_TAG="${1}"
+WINDOWS_IMAGE_TAG="${2}"
+LINUX_MCR_REPO="${3}"
+WINDOWS_MCR_REPO="${4}"
+
+if [ -z "$LINUX_IMAGE_TAG" ] || [ -z "$WINDOWS_IMAGE_TAG" ] || [ -z "$LINUX_MCR_REPO" ] || [ -z "$WINDOWS_MCR_REPO" ]; then
+  echo "Error: Missing required parameters"
+  echo "Usage: $0 <linux-image-tag> <windows-image-tag> <linux-mcr-repo> <windows-mcr-repo>"
+  exit 1
+fi
+
+LINUX_IMAGE="$LINUX_MCR_REPO:$LINUX_IMAGE_TAG"
+WINDOWS_IMAGE="$WINDOWS_MCR_REPO:$WINDOWS_IMAGE_TAG"
+
+# Source shared functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/util.sh"
+
+echo "================================"
+echo "Post-Test Pod Verification"
+echo "================================"
+echo "Verifying pods maintained correct images and are still healthy..."
+echo ""
+echo "Repository Configuration:"
+echo "  Linux MCR repo:   $LINUX_MCR_REPO"
+echo "  Windows MCR repo: $WINDOWS_MCR_REPO"
+echo ""
+echo "Expected Images:"
+echo "  Linux image:   $LINUX_IMAGE"
+echo "  Windows image: $WINDOWS_IMAGE"
+echo ""
+
+# Build pod configurations using shared function
+declare -a pod_configs
+build_pod_configs "$LINUX_IMAGE" "$WINDOWS_IMAGE"
+
+# Perform instant health check on all pods
+echo "Performing instant health check on all pods..."
+echo ""
+
+declare -a issues
+for config in "${pod_configs[@]}"; do
+  IFS='|' read -r pod_name expected_image container_name <<< "$config"
+  
+  # Get pod details
+  current_image=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.spec.containers[?(@.name=='$container_name')].image}" 2>/dev/null || echo "ERROR")
+  pod_status=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.status.phase}" 2>/dev/null || echo "Unknown")
+  container_ready=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.status.containerStatuses[?(@.name=='$container_name')].ready}" 2>/dev/null || echo "false")
+  
+  echo "Check pod: $pod_name"
+  echo "  Container: $container_name"
+  echo "  Expected image: $expected_image"
+  echo "  Current image:  $current_image"
+  echo "  Pod status: $pod_status"
+  echo "  Container ready: $container_ready"
+  
+  # Check for any issues
+  has_issue=false
+  
+  if [[ "$current_image" != "$expected_image" ]]; then
+    echo "  ✗ IMAGE MISMATCH!"
+    issues+=("$pod_name: expected image '$expected_image' but found '$current_image'")
+    has_issue=true
+  fi
+  
+  if [[ "$pod_status" != "Running" ]]; then
+    echo "  ✗ POD NOT RUNNING!"
+    issues+=("$pod_name: pod status is '$pod_status' (expected 'Running')")
+    has_issue=true
+  fi
+  
+  if [[ "$container_ready" != "true" ]]; then
+    echo "  ✗ CONTAINER NOT READY!"
+    issues+=("$pod_name: container '$container_name' is not ready")
+    has_issue=true
+  fi
+  
+  if [[ "$has_issue" = false ]]; then
+    echo "  ✓ Pod: $pod_name passed checks"
+  fi
+  echo ""
+done
+
+# Report results
+echo "================================"
+echo "Post-Test Verification Summary"
+echo "================================"
+
+if [ ${#issues[@]} -eq 0 ]; then
+  echo "✓ SUCCESS: All pods maintained the correct images and are healthy!"
+  echo ""
+  echo "Final pod status:"
+  kubectl get pods -n kube-system | grep ama-logs
+  exit 0
+else
+  echo "✗ FAILURE: Some pods have issues after test execution!"
+  echo ""
+  echo "Issues detected:"
+  printf '  - %s\n' "${issues[@]}"
+  echo ""
+  echo "This indicates the pods may have been restarted or updated during testing."
+  echo "This could cause test instability or false results."
+  echo ""
+  echo "Current pod status:"
+  kubectl get pods -n kube-system | grep ama-logs
+  echo ""
+  echo "Detailed pod information:"
+  for issue in "${issues[@]}"; do
+    pod=$(echo "$issue" | cut -d: -f1)
+    echo ""
+    echo "--- Details for $pod ---"
+    kubectl describe pod "$pod" -n kube-system | grep -A 20 "Events:" || kubectl describe pod "$pod" -n kube-system | tail -30
+  done
+  exit 1
+fi
diff --git a/.pipelines/e2e-test/verify-ci-images-before-test.sh b/.pipelines/e2e-test/verify-ci-images-before-test.sh
new file mode 100644
index 000000000..d0f4a4f25
--- /dev/null
+++ b/.pipelines/e2e-test/verify-ci-images-before-test.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# Pre-Test Pod Verification
+# Waits for all ama-logs pods to be running with the correct images and ready
+# This script is used BEFORE running E2E tests to ensure the new agent version is deployed
+
+set -e
+
+# Parse command line arguments
+LINUX_IMAGE_TAG="${1}"
+WINDOWS_IMAGE_TAG="${2}"
+LINUX_MCR_REPO="${3}"
+WINDOWS_MCR_REPO="${4}"
+
+if [ -z "$LINUX_IMAGE_TAG" ] || [ -z "$WINDOWS_IMAGE_TAG" ] || [ -z "$LINUX_MCR_REPO" ] || [ -z "$WINDOWS_MCR_REPO" ]; then
+  echo "Error: Missing required parameters"
+  echo "Usage: $0 <linux-image-tag> <windows-image-tag> <linux-mcr-repo> <windows-mcr-repo>"
+  exit 1
+fi
+
+LINUX_IMAGE="$LINUX_MCR_REPO:$LINUX_IMAGE_TAG"
+WINDOWS_IMAGE="$WINDOWS_MCR_REPO:$WINDOWS_IMAGE_TAG"
+
+# Configuration
+MAX_RETRIES=15
+CHECK_INTERVAL=60  # seconds
+MAX_WAIT_MINUTES=$((MAX_RETRIES * CHECK_INTERVAL / 60))
+
+# Source shared functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "$SCRIPT_DIR/util.sh"
+
+echo "================================"
+echo "Pre-Test Pod Verification"
+echo "================================"
+echo "Waiting for pods to be running with new images and ready..."
+echo ""
+echo "Repository Configuration:"
+echo "  Linux MCR repo:   $LINUX_MCR_REPO"
+echo "  Windows MCR repo: $WINDOWS_MCR_REPO"
+echo ""
+echo "Expected Images:"
+echo "  Linux image:   $LINUX_IMAGE"
+echo "  Windows image: $WINDOWS_IMAGE"
+echo ""
+
+# Build pod configurations using shared function
+declare -a pod_configs
+build_pod_configs "$LINUX_IMAGE" "$WINDOWS_IMAGE"
+
+# Validate array was populated
+if [ ${#pod_configs[@]} -eq 0 ]; then
+  echo "✗ ERROR: No pods found to verify!"
+  echo "This likely means no ama-logs pods exist in the kube-system namespace."
+  exit 1
+fi
+
+# Wait for all pods to be ready
+echo "================================"
+echo "Waiting for all pods to be ready"
+echo "================================"
+echo "Total pods to check: ${#pod_configs[@]}"
+echo "Maximum retries: $MAX_RETRIES"
+echo "Check interval: ${CHECK_INTERVAL}s"
+echo "Maximum wait time: $MAX_WAIT_MINUTES minutes"
+echo ""
+
+# Track ready status for each pod
+declare -A pod_ready_status
+for config in "${pod_configs[@]}"; do
+  pod_name=$(echo "$config" | cut -d'|' -f1)
+  pod_ready_status["$pod_name"]=false
+done
+
+attempt=1
+while [ $attempt -le $MAX_RETRIES ]; do
+  has_not_ready_pod=false
+  ready_count=0
+  total_count=${#pod_configs[@]}
+  
+  # Check each pod
+  for config in "${pod_configs[@]}"; do
+    IFS='|' read -r pod_name expected_image container_name <<< "$config"
+    echo ""
+    echo ""
+    echo "  Start checking pod: $pod_name"
+    echo "    Container: $container_name"
+    echo "    Expected image: $expected_image"
+
+    # Skip if already marked as ready
+    if [ "${pod_ready_status[$pod_name]}" = "true" ]; then
+      echo "  Finished checking pod: $pod_name"
+      echo "    Pod: $pod_name has expected image ready. Skipping check."
+      echo "    ✓ $pod_name - Ready"
+      ready_count=$((ready_count + 1))
+      continue
+    fi
+    
+    # Get pod details
+    current_image=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.spec.containers[?(@.name=='$container_name')].image}" 2>/dev/null || echo "")
+    pod_status=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.status.phase}" 2>/dev/null || echo "Unknown")
+    container_ready=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.status.containerStatuses[?(@.name=='$container_name')].ready}" 2>/dev/null || echo "false")
+    
+    # Check if pod is ready
+    if [[ "$current_image" == "$expected_image" ]] && [[ "$pod_status" == "Running" ]] && [[ "$container_ready" == "true" ]]; then
+      pod_ready_status["$pod_name"]=true
+      ready_count=$((ready_count + 1))
+      echo "  Finished checking pod: $pod_name"
+      echo "    Image: $current_image"
+      echo "    Expected image: $expected_image"
+      echo "    Status: $pod_status"
+      echo "    Container ready: $container_ready"
+      echo "    ✓ $pod_name - Ready"
+    else
+      has_not_ready_pod=true
+      echo "  Finished checking pod: $pod_name"
+      echo "    ⏳ $pod_name - Waiting (Status: $pod_status, Container ready: $container_ready)"
+      if [[ "$current_image" != "$expected_image" ]]; then
+        echo "    Image mismatch: expected $expected_image, got $current_image"
+      fi
+      echo "    x $pod_name - NOT Ready"
+    fi
+  done
+  
+  # Show progress summary
+  elapsed_seconds=$(((attempt - 1) * CHECK_INTERVAL))
+  minutes_elapsed=$((elapsed_seconds / 60))
+  seconds_elapsed=$((elapsed_seconds % 60))
+  remaining_retries=$((MAX_RETRIES - attempt))
+  remaining_seconds=$((remaining_retries * CHECK_INTERVAL))
+  minutes_remaining=$((remaining_seconds / 60))
+  seconds_remaining=$((remaining_seconds % 60))
+  
+  echo ""
+  echo "Attempt $attempt/$MAX_RETRIES (${minutes_elapsed}m${seconds_elapsed}s elapsed, ${minutes_remaining}m${seconds_remaining}s remaining)"
+  echo "Progress: $ready_count/$total_count pods ready"
+  echo ""
+  
+  # Exit early if all pods are ready
+  if [ "$has_not_ready_pod" = false ]; then
+    echo "================================"
+    echo "✓ SUCCESS: All pods are ready!"
+    echo "================================"
+    echo "Total attempts: $attempt"
+    echo "Total wait time: ${minutes_elapsed}m${seconds_elapsed}s"
+    echo ""
+    echo "Final pod status:"
+    kubectl get pods -n kube-system | grep ama-logs
+    exit 0
+  fi
+  
+  # Sleep before next retry (except after last attempt)
+  if [ $attempt -lt $MAX_RETRIES ]; then
+    sleep $CHECK_INTERVAL
+  fi
+  
+  ((attempt++))
+done
+
+# Max retries reached - report failed pods
+echo "================================"
+echo "✗ TIMEOUT: Not all pods became ready after $MAX_RETRIES attempts"
+echo "================================"
+echo ""
+echo "Failed pods:"
+for config in "${pod_configs[@]}"; do
+  IFS='|' read -r pod_name expected_image container_name <<< "$config"
+  if [ "${pod_ready_status[$pod_name]}" != "true" ]; then
+    current_image=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.spec.containers[?(@.name=='$container_name')].image}" 2>/dev/null || echo "ERROR")
+    pod_status=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.status.phase}" 2>/dev/null || echo "Unknown")
+    container_ready=$(kubectl get pod "$pod_name" -n kube-system -o jsonpath="{.status.containerStatuses[?(@.name=='$container_name')].ready}" 2>/dev/null || echo "false")
+    
+    echo "  ✗ $pod_name"
+    echo "      Expected image: $expected_image"
+    echo "      Current image:  $current_image"
+    echo "      Pod status:     $pod_status"
+    echo "      Container ready: $container_ready"
+  fi
+done
+echo ""
+echo "Final pod status:"
+kubectl get pods -n kube-system | grep ama-logs
+exit 1
diff --git a/kubernetes/linux/setup.sh b/kubernetes/linux/setup.sh
index ed037d598..31883f858 100644
--- a/kubernetes/linux/setup.sh
+++ b/kubernetes/linux/setup.sh
@@ -78,6 +78,12 @@ echo "$(fluent-bit --version)" >> packages_version.txt
 
 # install fluentd
 fluentd_version="1.16.3"
+
+# Pre-install cool.io to avoid ARM64 build issues (segfault during native extension compilation)
+# if [ "$ARCH" == "arm64" ]; then
+#     gem install cool.io -v "1.9.0" --no-document
+# fi
+
 gem install fluentd -v $fluentd_version --no-document
 
 # remove the test directory from fluentd