diff --git a/tests/e2e/backup_restore_cli_suite_test.go b/tests/e2e/backup_restore_cli_suite_test.go index 10677dda808..b87079fa0b3 100644 --- a/tests/e2e/backup_restore_cli_suite_test.go +++ b/tests/e2e/backup_restore_cli_suite_test.go @@ -1,6 +1,7 @@ package e2e_test import ( + "context" "fmt" "log" "strings" @@ -8,6 +9,7 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/openshift/oadp-operator/tests/e2e/lib" ) @@ -173,6 +175,21 @@ func runApplicationBackupAndRestoreViaCLI(brCase ApplicationBackupRestoreCase, u // run restore via CLI runRestoreViaCLI(brCase.BackupRestoreCase, backupName, restoreName, nsRequiredResticDCWorkaround) + // For file-system backup restores (KOPIA/restic), the restored pods may have + // broken networking because OVN-Kubernetes doesn't fully wire the network + // namespace for pods recreated by Velero with a restore-wait init container. + // Deleting the pods lets the deployment controller create fresh ones with + // proper networking while preserving the restored PVC data. + if brCase.BackupRestoreType == lib.KOPIA { + log.Printf("Restarting pods in namespace %s to ensure proper networking after file-system restore", brCase.Namespace) + err = kubernetesClientForSuiteRun.CoreV1().Pods(brCase.Namespace).DeleteCollection( + context.Background(), + metav1.DeleteOptions{}, + metav1.ListOptions{LabelSelector: "e2e-app=true"}, + ) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + } + // Run optional custom verification if brCase.PostRestoreVerify != nil { log.Printf("Running post-restore custom function for case %s", brCase.Name) diff --git a/tests/e2e/backup_restore_suite_test.go b/tests/e2e/backup_restore_suite_test.go index fc91f733c62..09090da4fbf 100644 --- a/tests/e2e/backup_restore_suite_test.go +++ b/tests/e2e/backup_restore_suite_test.go @@ -266,6 +266,21 @@ func runApplicationBackupAndRestore(brCase ApplicationBackupRestoreCase, updateL // run restore runRestore(brCase.BackupRestoreCase, backupName, restoreName, nsRequiredResticDCWorkaround) + // For file-system backup restores (KOPIA/restic), the restored pods may have + // broken networking because OVN-Kubernetes doesn't fully wire the network + // namespace for pods recreated by Velero with a restore-wait init container. + // Deleting the pods lets the deployment controller create fresh ones with + // proper networking while preserving the restored PVC data. + if brCase.BackupRestoreType == lib.KOPIA { + log.Printf("Restarting pods in namespace %s to ensure proper networking after file-system restore", brCase.Namespace) + err = kubernetesClientForSuiteRun.CoreV1().Pods(brCase.Namespace).DeleteCollection( + context.Background(), + metav1.DeleteOptions{}, + metav1.ListOptions{LabelSelector: "e2e-app=true"}, + ) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + } + // Run optional custom verification if brCase.PostRestoreVerify != nil { log.Printf("Running post-restore custom function for case %s", brCase.Name) diff --git a/tests/e2e/lib/apps.go b/tests/e2e/lib/apps.go index ebabfe67c7c..82e3b1198e9 100755 --- a/tests/e2e/lib/apps.go +++ b/tests/e2e/lib/apps.go @@ -421,7 +421,9 @@ func RunMustGather(artifact_dir string, clusterClient client.Client) error { } // VerifyBackupRestoreData verifies if app ready before backup and after restore to compare data. -func VerifyBackupRestoreData(ocClient client.Client, kubeClient *kubernetes.Clientset, kubeConfig *rest.Config, artifactDir string, namespace string, routeName string, serviceName string, app string, prebackupState bool, twoVol bool) error { +// skipReadyz skips the post-restore readyz endpoint check (use for VM-based tests where the +// app route is not directly reachable from the test harness). +func VerifyBackupRestoreData(ocClient client.Client, kubeClient *kubernetes.Clientset, kubeConfig *rest.Config, artifactDir string, namespace string, routeName string, serviceName string, app string, prebackupState bool, twoVol bool, skipReadyz ...bool) error { log.Printf("Verifying backup/restore data of %s", app) appEndpointURL, proxyPodParams, err := getAppEndpointURLAndProxyParams(ocClient, kubeClient, kubeConfig, namespace, serviceName, routeName) log.Printf("App endpoint URL: %s", appEndpointURL) @@ -490,19 +492,107 @@ func VerifyBackupRestoreData(ocClient client.Client, kubeClient *kubernetes.Clie return err } } else { - //restore check + // --- Restore verification --- + // After a Velero restore, verify that the application is serving the same data + // that was captured before the backup (stored in backup-data.txt). + // + // Flow for todo apps (mysql-persistent / mongo-persistent): + // 1. If !shouldSkipReadyz: poll /healthz to confirm the app is alive. + // 2. Fetch /todo-incomplete to get the current data for comparison. + // - For VM-based tests (shouldSkipReadyz=true) the route is not directly + // reachable until the VM finishes cloud-init, so we poll with retries. + // - For container-based tests a single request suffices after healthz passes. + // Flow for parks-app: single GET /clicks. + // Finally, compare the fetched data against backup-data.txt. + + shouldSkipReadyz := len(skipReadyz) > 0 && skipReadyz[0] + isTodoApp := namespace == "mysql-persistent" || namespace == "mongo-persistent" + + // Step 1: healthz gate (container-based todo apps only). + // Polls /healthz to confirm the app is alive and the HTTP server is responding. + // The todo2-go app exposes /healthz (used by all K8s probes) and /readyz (returns + // 503 until DB is connected). We use /healthz here because it matches the probe + // configuration in the app manifests and becomes available immediately on startup. + // Skipped for VM tests where the app runs inside a Fedora/CentOS VM and the + // OpenShift route proxies to a different service topology. + if isTodoApp && !shouldSkipReadyz { + // MakeRequest can return err == nil for HTTP 5xx when using the proxy (curl), + // so we validate the response body and errResp via isHealthzAlive. + requestParams := getRequestParameters(appEndpointURL+"/healthz", proxyPodParams, GET, nil) + const maxHealthzAttempts = 5 + for attempt := 1; attempt <= maxHealthzAttempts; attempt++ { + log.Printf("healthz check attempt %d/%d: GET %s/healthz\n", attempt, maxHealthzAttempts, appEndpointURL) + respData, errResp, err = MakeRequest(*requestParams) + if err == nil && isHealthzAlive(respData, errResp) { + log.Printf("healthz endpoint is alive (attempt %d/%d): %s\n", attempt, maxHealthzAttempts, respData) + break + } + if err != nil { + if errResp != "" { + log.Printf("Request response error msg: %s\n", errResp) + } + } else { + log.Printf("healthz attempt %d/%d: response not healthy (body=%q, errResp=%q)\n", attempt, maxHealthzAttempts, respData, errResp) + } + if attempt == maxHealthzAttempts { + log.Printf("healthz endpoint did not become alive after %d attempts: %v\n", maxHealthzAttempts, err) + if err != nil { + return err + } + return fmt.Errorf("healthz did not return healthy response after %d attempts (last body=%q, errResp=%q)", maxHealthzAttempts, respData, errResp) + } + backoff := time.Duration(attempt) * 5 * time.Second + log.Printf("healthz attempt %d/%d failed, retrying in %s: %v\n", attempt, maxHealthzAttempts, backoff, err) + time.Sleep(backoff) + } + } - if namespace == "mysql-persistent" || namespace == "mongo-persistent" { - // Make request to the "todo-incomplete" endpoint + // Step 2: fetch /todo-incomplete data for todo apps. + // In the VM (shouldSkipReadyz) case we skipped the readyz gate above, so the + // app may not be ready yet. Poll with retries and increasing backoff. + // In the container case healthz already passed, so one attempt is enough. + if isTodoApp { requestParamsTodoIncomplete := getRequestParameters(appEndpointURL+"/todo-incomplete", proxyPodParams, GET, nil) - respData, errResp, err = MakeRequest(*requestParamsTodoIncomplete) - if err != nil { - if errResp != "" { - log.Printf("Request response error msg: %s\n", errResp) + maxTodoAttempts := 1 + todoBackoffSec := 0 + if shouldSkipReadyz { + maxTodoAttempts = 10 + todoBackoffSec = 10 + } + for attempt := 1; attempt <= maxTodoAttempts; attempt++ { + if maxTodoAttempts > 1 { + log.Printf("Polling app endpoint attempt %d/%d: GET %s/todo-incomplete", attempt, maxTodoAttempts, appEndpointURL) } - return err + respData, errResp, err = MakeRequest(*requestParamsTodoIncomplete) + success := err == nil && (maxTodoAttempts == 1 || len(bytes.TrimSpace([]byte(respData))) > 0) + if success { + if maxTodoAttempts > 1 { + log.Printf("VIRT App endpoint responded with data (attempt %d/%d): %s", attempt, maxTodoAttempts, respData) + } + break + } + if attempt == maxTodoAttempts { + if err != nil { + if errResp != "" { + log.Printf("Request response error msg: %s\n", errResp) + } + return err + } + if maxTodoAttempts > 1 { + log.Printf("VIRT App endpoint returned empty data after %d attempts", maxTodoAttempts) + return errors.New("VIRT App endpoint returned empty data after max attempts") + } + if errResp != "" { + log.Printf("Request response error msg: %s\n", errResp) + } + return err + } + backoff := time.Duration(attempt) * time.Duration(todoBackoffSec) * time.Second + log.Printf("VIRT Attempt %d/%d: no data yet, retrying in %s (err=%v, resp=%q)", attempt, maxTodoAttempts, backoff, err, respData) + time.Sleep(backoff) } } + if namespace == "parks-app" { // Make request to the "clicks" endpoint responseParams := getRequestParameters(appEndpointURL+"/clicks", proxyPodParams, GET, nil) @@ -539,6 +629,26 @@ func VerifyBackupRestoreData(ocClient client.Client, kubeClient *kubernetes.Clie return nil } +// errRespIndicatesHTTPError returns true when errResp contains HTTP error indicators (e.g. 5xx from MakeRequest). +func errRespIndicatesHTTPError(errResp string) bool { + if errResp == "" { + return false + } + return strings.Contains(errResp, "HTTP request failed") || + strings.Contains(errResp, "status code") || + strings.Contains(errResp, "500") || + strings.Contains(errResp, "502") || + strings.Contains(errResp, "503") +} + +// isHealthzAlive returns true when the /healthz response indicates the app is +// alive: the response body is non-empty (any content is fine) and errResp +// does not contain HTTP error indicators. +func isHealthzAlive(respData, errResp string) bool { + return strings.TrimSpace(respData) != "" && + !errRespIndicatesHTTPError(errResp) +} + func getRequestParameters(url string, proxyPodParams *ProxyPodParameters, method HTTPMethod, payload *string) *RequestParameters { return &RequestParameters{ ProxyPodParams: proxyPodParams, diff --git a/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-block.yaml b/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-block.yaml index 8ba00128be9..c22bd0a5c59 100644 --- a/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-block.yaml +++ b/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-block.yaml @@ -29,7 +29,12 @@ items: supplementalGroups: type: RunAsAny volumes: - - '*' + - persistentVolumeClaim + - secret + - configMap + - downwardAPI + - projected + - emptyDir users: - system:admin - system:serviceaccount:mongo-persistent:mongo-persistent-sa @@ -78,18 +83,21 @@ items: securityContext: runAsUser: 0 # Format the block device on first use so MongoDB can use the filesystem + # Use same app image; install e2fsprogs then format (mongo:7 base has apt) initContainers: - - image: quay.io/migtools/oadp-ci-todolist-mongo-go-testing:latest + - image: quay.io/migtools/oadp-ci-todo2-go-testing-mongodb:latest imagePullPolicy: IfNotPresent securityContext: privileged: true name: setup-block-device command: - - "sh" + - "bash" - "-c" - | - DEVICE="/dev/xvdx" - MOUNT_POINT="/data/db" + set -e + apt-get update -qq && apt-get install -y -qq e2fsprogs + DEVICE="/dev/block-pv" + MOUNT_POINT="/var/lib/mongodb" if [ ! -e $DEVICE ]; then echo "$DEVICE does not exist." exit 1 @@ -108,18 +116,16 @@ items: umount $MOUNT_POINT volumeDevices: - name: block-volume-pv - devicePath: /dev/xvdx + devicePath: /dev/block-pv containers: - name: todolist - image: quay.io/migtools/oadp-ci-todolist-mongo-go-testing:latest + image: quay.io/migtools/oadp-ci-todo2-go-testing-mongodb:latest securityContext: privileged: true env: - - name: MONGO_INITDB_ROOT_USERNAME - value: changeme - - name: MONGO_INITDB_ROOT_PASSWORD - value: changeme - - name: MONGO_INITDB_DATABASE + - name: DB_BACKEND + value: mongodb + - name: MONGO_DATABASE value: todolist ports: - containerPort: 8000 @@ -127,19 +133,19 @@ items: resources: limits: memory: 512Mi - # Block mode: mount the block device and use it for /data/db + # Block mode: mount the block device and use it for /var/lib/mongodb command: - "sh" - "-c" - | - DEVICE="/dev/xvdx" - MOUNT_POINT="/data/db" + DEVICE="/dev/block-pv" + MOUNT_POINT="/var/lib/mongodb" mkdir -p $MOUNT_POINT mount $DEVICE $MOUNT_POINT exec /opt/todolist/entrypoint.sh volumeDevices: - name: block-volume-pv - devicePath: /dev/xvdx + devicePath: /dev/block-pv startupProbe: httpGet: path: /healthz @@ -157,7 +163,7 @@ items: periodSeconds: 10 readinessProbe: httpGet: - path: /healthz + path: /readyz port: 8000 initialDelaySeconds: 10 periodSeconds: 5 diff --git a/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-csi.yaml b/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-csi.yaml index 60d0594cb64..d14c8ae3ecf 100644 --- a/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-csi.yaml +++ b/tests/e2e/sample-applications/mongo-persistent/mongo-persistent-csi.yaml @@ -77,7 +77,7 @@ items: serviceAccountName: mongo-persistent-sa containers: - name: todolist - image: quay.io/migtools/oadp-ci-todolist-mongo-go-testing:latest + image: quay.io/migtools/oadp-ci-todo2-go-testing-mongodb:latest securityContext: privileged: false allowPrivilegeEscalation: false @@ -87,11 +87,9 @@ items: seccompProfile: type: RuntimeDefault env: - - name: MONGO_INITDB_ROOT_USERNAME - value: changeme - - name: MONGO_INITDB_ROOT_PASSWORD - value: changeme - - name: MONGO_INITDB_DATABASE + - name: DB_BACKEND + value: mongodb + - name: MONGO_DATABASE value: todolist ports: - containerPort: 8000 @@ -101,7 +99,7 @@ items: memory: 512Mi volumeMounts: - name: mongo-data - mountPath: /data/db + mountPath: /var/lib/mongodb startupProbe: httpGet: path: /healthz diff --git a/tests/e2e/sample-applications/mongo-persistent/mongo-persistent.yaml b/tests/e2e/sample-applications/mongo-persistent/mongo-persistent.yaml index 0e570a17f45..455f244f333 100644 --- a/tests/e2e/sample-applications/mongo-persistent/mongo-persistent.yaml +++ b/tests/e2e/sample-applications/mongo-persistent/mongo-persistent.yaml @@ -90,13 +90,11 @@ items: serviceAccountName: mongo-persistent-sa containers: - name: todolist - image: quay.io/migtools/oadp-ci-todolist-mongo-go-testing:latest + image: quay.io/migtools/oadp-ci-todo2-go-testing-mongodb:latest env: - - name: MONGO_INITDB_ROOT_USERNAME - value: changeme - - name: MONGO_INITDB_ROOT_PASSWORD - value: changeme - - name: MONGO_INITDB_DATABASE + - name: DB_BACKEND + value: mongodb + - name: MONGO_DATABASE value: todolist ports: - containerPort: 8000 @@ -106,7 +104,7 @@ items: memory: 512Mi volumeMounts: - name: mongo-data - mountPath: /data/db + mountPath: /var/lib/mongodb startupProbe: httpGet: path: /healthz diff --git a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml index d17b75e3c4e..730ba4a66a0 100644 --- a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml +++ b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-csi.yaml @@ -81,12 +81,14 @@ items: serviceAccountName: mysql-persistent-sa containers: - name: todolist - image: quay.io/migtools/oadp-ci-todolist-mariadb-go-testing:testing + image: quay.io/migtools/oadp-ci-todo2-go-testing-mariadb:latest securityContext: runAsGroup: 27 runAsUser: 27 privileged: true env: + - name: DB_BACKEND + value: mariadb - name: MYSQL_USER value: changeme - name: MYSQL_PASSWORD diff --git a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml index 19b9228bea2..3adc87ecb3a 100644 --- a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml +++ b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent-twovol-csi.yaml @@ -77,10 +77,12 @@ items: serviceAccountName: mysql-persistent-sa containers: - name: todolist - image: quay.io/migtools/oadp-ci-todolist-mariadb-go-testing:testing + image: quay.io/migtools/oadp-ci-todo2-go-testing-mariadb:latest securityContext: privileged: false env: + - name: DB_BACKEND + value: mariadb - name: MYSQL_USER value: changeme - name: MYSQL_PASSWORD diff --git a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml index 40fd294da2c..2fe71fda4ee 100644 --- a/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml +++ b/tests/e2e/sample-applications/mysql-persistent/mysql-persistent.yaml @@ -33,6 +33,9 @@ items: name: mysql-persistent-scc allowPrivilegeEscalation: true allowPrivilegedContainer: true + allowedCapabilities: + - CHOWN + - FOWNER runAsUser: type: RunAsAny seLinuxContext: @@ -95,7 +98,7 @@ items: serviceAccountName: mysql-persistent-sa containers: - name: todolist - image: quay.io/migtools/oadp-ci-todolist-mariadb-go-testing:testing + image: quay.io/migtools/oadp-ci-todo2-go-testing-mariadb:latest securityContext: runAsUser: 27 runAsGroup: 27 @@ -104,7 +107,12 @@ items: capabilities: drop: - ALL + add: + - CHOWN + - FOWNER env: + - name: DB_BACKEND + value: mariadb - name: MYSQL_USER value: changeme - name: MYSQL_PASSWORD diff --git a/tests/e2e/virt_backup_restore_suite_test.go b/tests/e2e/virt_backup_restore_suite_test.go index 778dd662490..d3cc605ae27 100644 --- a/tests/e2e/virt_backup_restore_suite_test.go +++ b/tests/e2e/virt_backup_restore_suite_test.go @@ -32,7 +32,7 @@ func vmTodoListReady(preBackupState bool, twoVol bool, database string) Verifica if err != nil { return err } - err = lib.VerifyBackupRestoreData(runTimeClientForSuiteRun, kubernetesClientForSuiteRun, kubeConfig, artifact_dir, namespace, "todolist-route", "todolist", "todolist", preBackupState, twoVol) + err = lib.VerifyBackupRestoreData(runTimeClientForSuiteRun, kubernetesClientForSuiteRun, kubeConfig, artifact_dir, namespace, "todolist-route", "todolist", "todolist", preBackupState, twoVol, true) return err }) }