diff --git a/.github/actions/setup-e2e-tools/action.yml b/.github/actions/setup-e2e-tools/action.yml new file mode 100644 index 0000000000..8bcb287237 --- /dev/null +++ b/.github/actions/setup-e2e-tools/action.yml @@ -0,0 +1,80 @@ +name: Setup E2E Tools +description: Install kubectl, helm, d8, task, yq with caching + +runs: + using: composite + steps: + - name: Cache binaries + uses: actions/cache@v4 + id: cache + with: + path: ~/.local/bin + key: e2e-tools-${{ runner.os }}-v3 + + - name: Ensure tools installed (install if missing or cache empty) + shell: bash + run: | + set -euo pipefail + mkdir -p ~/.local/bin + + need_install=false + for bin in helm yq task; do + if ! command -v "$bin" >/dev/null 2>&1; then + echo "missing: $bin"; need_install=true + fi + done + # Always (re)install kubectl into ~/.local/bin to shadow system kubectl + need_install=true + if [ "${need_install}" != "true" ] && [ "${{ steps.cache.outputs.cache-hit }}" = "true" ]; then + echo "All tools present from cache, skipping install." + exit 0 + fi + + # Install system dependencies + sudo apt-get update + sudo apt-get install -y jq apache2-utils curl bash ca-certificates + + # Install kubectl (always to ~/.local/bin) + KUBECTL_VERSION=$(curl -Ls https://dl.k8s.io/release/stable.txt) + curl -fsSL -o kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" + chmod +x kubectl && mv kubectl ~/.local/bin/kubectl + + # Install helm + curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + if [ -x /usr/local/bin/helm ]; then mv /usr/local/bin/helm ~/.local/bin/; fi + + # Install d8 + curl -fsSL -o d8-install.sh https://raw.githubusercontent.com/deckhouse/deckhouse-cli/main/d8-install.sh + bash d8-install.sh || true + if [ -x /usr/local/bin/d8 ]; then cp -f /usr/local/bin/d8 ~/.local/bin/d8; fi + + # Install yq + curl -L -o ~/.local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 + chmod +x ~/.local/bin/yq + + # Install task + sh -c "$(curl -fsSL https://taskfile.dev/install.sh)" -- -d -b ~/.local/bin + + # Cleanup + rm -f d8-install.sh + + - name: Add to PATH + shell: bash + run: | + echo "$HOME/.local/bin" >> $GITHUB_PATH + echo "/usr/local/bin" >> $GITHUB_PATH + + - name: Verify installation + shell: bash + run: | + echo "โœ… Installed tools:" + which kubectl || true + kubectl version --client=true --output=yaml || kubectl version || true + which helm || true + helm version --short || true + which d8 || true + d8 version || true + which yq || true + yq --version || true + which task || true + task --version || true diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index d6ae02c147..64abe14721 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -12,23 +12,913 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: E2E Matrix Tests (bootstrap) +name: E2E Matrix Tests (DVP-over-DVP) on: + push: + branches: + - feat/ci-e2e-matrix pull_request: types: [opened, reopened, synchronize, labeled, unlabeled] branches: - main - feat/ci-e2e-matrix + schedule: + - cron: "30 2 * * *" workflow_dispatch: + inputs: + profiles: + description: "Storage profiles (comma-separated): sds, cephrbd" + required: false + default: "sds,cephrbd" + timeout: + description: "Ginkgo timeout (e.g. 2h, 4h)" + required: false + default: "4h" permissions: contents: read +env: + E2E_K8S_URL: https://api.e2e.virtlab.flant.com + jobs: - noop: - name: Bootstrap + # ============================================ + # 1. SETUP - Environment preparation + # ============================================ + setup: + name: Setup Environment + runs-on: ubuntu-latest + outputs: + profiles: ${{ steps.load.outputs.profiles }} + steps: + - uses: actions/checkout@v4 + + - name: Load storage profiles + id: load + run: | + # Load profiles dynamically from profiles.json + cd ci/dvp-e2e + PROFILES=$(jq -r '[.[].name] | @json' profiles.json) + echo "profiles=$PROFILES" >> "$GITHUB_OUTPUT" + + - name: Print matrix + run: | + echo "Will test profiles: ${{ steps.load.outputs.profiles }}" + + # ============================================ + # 2. E2E - Parallel test execution + # ============================================ + prepare: + name: Prepare Cluster (${{ matrix.profile }}) + needs: [setup] runs-on: ubuntu-latest + timeout-minutes: 300 + concurrency: + group: prepare-${{ github.ref }}-${{ matrix.profile }} + cancel-in-progress: true + strategy: + fail-fast: false + matrix: + profile: ${{ fromJson(needs.setup.outputs.profiles) }} + + env: + GO_VERSION: "1.24.6" + TMP_ROOT: ${{ github.workspace }}/ci/dvp-e2e/tmp + LOOP_WEBHOOK: ${{ secrets.LOOP_WEBHOOK_URL || secrets.LOOP_WEBHOOK }} + LOOP_CHANNEL: ${{ secrets.LOOP_CHANNEL || 'test-virtualization-loop-alerts' }} # TODO: replace with channel secret after successful run + + outputs: + run_id: ${{ steps.prep.outputs.run_id }} + storage_class: ${{ steps.profile-config.outputs.storage_class }} + image_storage_class: ${{ steps.profile-config.outputs.image_storage_class }} + snapshot_storage_class: ${{ steps.profile-config.outputs.snapshot_storage_class }} + attach_disk_size: ${{ steps.profile-config.outputs.attach_disk_size }} + steps: - - name: Say hello - run: echo "Bootstrap workflow OK" + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + + + - name: Install Task + uses: arduino/setup-task@v2 + with: + version: 3.x + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install Helm + uses: azure/setup-helm@v4.3.0 + with: + version: v3.17.2 + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: 'latest' + + - name: Install Deckhouse CLI + env: + D8_VERSION: v0.13.2 + run: | + set -euo pipefail + echo "Installing d8 ${D8_VERSION}..." + curl -fsSL -o d8.tgz "https://deckhouse.io/downloads/deckhouse-cli/${D8_VERSION}/d8-${D8_VERSION}-linux-amd64.tar.gz" + tar -xzf d8.tgz linux-amd64/bin/d8 + mv linux-amd64/bin/d8 /usr/local/bin/d8 + chmod +x /usr/local/bin/d8 + rm -rf d8.tgz linux-amd64 + d8 --version + + - name: Install yq + run: | + echo "Installing yq..." + curl -L -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 + chmod +x /usr/local/bin/yq + + - name: Prepare environment + id: prep + run: | + RUN_ID="nightly-nested-e2e-${{ matrix.profile }}-$(date +%H%M)" + echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" + echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" + echo "PROFILE=${{ matrix.profile }}" >> "$GITHUB_ENV" + echo "TMP_ROOT=${{ env.TMP_ROOT }}" >> "$GITHUB_ENV" + mkdir -p "${{ env.TMP_ROOT }}/shared" "${{ env.TMP_ROOT }}/matrix-logs" + + - name: Build parent kubeconfig from secret + shell: bash + run: | + set -euo pipefail + mkdir -p "$HOME/.kube" + cat > "$HOME/.kube/config" <> "$GITHUB_ENV" + + - name: Prepare run values.yaml + working-directory: ci/dvp-e2e + run: | + task run:values:prepare \ + RUN_ID="${{ env.RUN_ID }}" \ + RUN_NAMESPACE="${{ env.RUN_ID }}" \ + RUN_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" + echo "VALUES_TEMPLATE_FILE=${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" >> $GITHUB_ENV + + - name: Prepare Deckhouse registry auth + run: | + set -euo pipefail + prod_user='${{ secrets.PROD_READ_REGISTRY_USER }}' + prod_pass='${{ secrets.PROD_READ_REGISTRY_PASSWORD }}' + dev_user='${{ secrets.BOOTSTRAP_DEV_REGISTRY_LOGIN }}' + dev_pass='${{ secrets.BOOTSTRAP_DEV_REGISTRY_PASSWORD }}' + if [ -z "$prod_user" ] || [ -z "$prod_pass" ]; then + echo "[ERR] PROD_READ_REGISTRY_* secrets are not configured" >&2 + exit 1 + fi + if [ -z "$dev_user" ] || [ -z "$dev_pass" ]; then + echo "[ERR] BOOTSTRAP_DEV_REGISTRY_* secrets are not configured" >&2 + exit 1 + fi + echo "::add-mask::$prod_user" + echo "::add-mask::$prod_pass" + echo "::add-mask::$dev_user" + echo "::add-mask::$dev_pass" + prod_auth_b64=$(printf '%s:%s' "$prod_user" "$prod_pass" | base64 | tr -d '\n') + dev_auth_b64=$(printf '%s:%s' "$dev_user" "$dev_pass" | base64 | tr -d '\n') + docker_cfg=$(printf '{"auths":{"registry.deckhouse.io":{"auth":"%s"},"dev-registry.deckhouse.io":{"auth":"%s"}}}' "$prod_auth_b64" "$dev_auth_b64") + docker_cfg_b64=$(printf '%s' "$docker_cfg" | base64 | tr -d '\n') + echo "::add-mask::$docker_cfg_b64" + { + echo "REGISTRY_DOCKER_CFG=$docker_cfg_b64" + echo "DECKHOUSE_REGISTRY_USER=$prod_user" + echo "DECKHOUSE_REGISTRY_PASSWORD=$prod_pass" + } >> "$GITHUB_ENV" + + - name: Configure registry auth (REGISTRY_DOCKER_CFG) + working-directory: ci/dvp-e2e + run: | + yq eval --inplace '.deckhouse.registryDockerCfg = strenv(REGISTRY_DOCKER_CFG)' "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" + + - name: Docker login to Deckhouse registry + uses: docker/login-action@v3 + with: + registry: registry.deckhouse.io + username: ${{ env.DECKHOUSE_REGISTRY_USER }} + password: ${{ env.DECKHOUSE_REGISTRY_PASSWORD }} + + - name: Docker login to Deckhouse dev registry + uses: docker/login-action@v3 + with: + registry: ${{ vars.DEV_REGISTRY }} + username: ${{ secrets.BOOTSTRAP_DEV_REGISTRY_LOGIN }} + password: ${{ secrets.BOOTSTRAP_DEV_REGISTRY_PASSWORD }} + + - name: Configure storage profile + working-directory: ci/dvp-e2e + id: profile-config + env: + PROFILE: ${{ matrix.profile }} + run: | + # Get storage class configuration from profiles.json + PROFILE_CONFIG=$(./scripts/get_profile_config.sh "${PROFILE}") + + # Parse the output more carefully + STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^STORAGE_CLASS=" | cut -d'=' -f2) + IMAGE_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^IMAGE_STORAGE_CLASS=" | cut -d'=' -f2) + SNAPSHOT_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^SNAPSHOT_STORAGE_CLASS=" | cut -d'=' -f2) + ATTACH_DISK_SIZE=$(echo "$PROFILE_CONFIG" | grep "^ATTACH_DISK_SIZE=" | cut -d'=' -f2) + + echo "Profile: ${PROFILE}" + echo "Storage Class: ${STORAGE_CLASS}" + echo "Image Storage Class: ${IMAGE_STORAGE_CLASS}" + echo "Snapshot Storage Class: ${SNAPSHOT_STORAGE_CLASS}" + echo "Attach Disk Size: ${ATTACH_DISK_SIZE}" + + # Export variables safely + echo "STORAGE_CLASS=${STORAGE_CLASS}" >> $GITHUB_ENV + echo "IMAGE_STORAGE_CLASS=${IMAGE_STORAGE_CLASS}" >> $GITHUB_ENV + echo "SNAPSHOT_STORAGE_CLASS=${SNAPSHOT_STORAGE_CLASS}" >> $GITHUB_ENV + echo "ATTACH_DISK_SIZE=${ATTACH_DISK_SIZE}" >> $GITHUB_ENV + echo "storage_class=$STORAGE_CLASS" >> $GITHUB_OUTPUT + echo "image_storage_class=$IMAGE_STORAGE_CLASS" >> $GITHUB_OUTPUT + echo "snapshot_storage_class=$SNAPSHOT_STORAGE_CLASS" >> $GITHUB_OUTPUT + echo "attach_disk_size=$ATTACH_DISK_SIZE" >> $GITHUB_OUTPUT + # Pass storage profile into run values for Helm templates + PROFILE='${{ matrix.profile }}' yq eval --inplace '.storageProfile = strenv(PROFILE)' "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" + # Effective disk SC used for worker data disks (prefer image SC when set) + EFF_DISK_SC=${IMAGE_STORAGE_CLASS:-$STORAGE_CLASS} + echo "EFFECTIVE_DISK_SC=${EFF_DISK_SC}" >> $GITHUB_ENV + + - name: Install infra (namespace/RBAC/jump-host) + working-directory: ci/dvp-e2e + run: | + USE_GH_SSH_KEYS=true SSH_FILE_NAME=id_ed task render-infra \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + SSH_FILE_NAME="id_ed" + USE_GH_SSH_KEYS=true SSH_FILE_NAME=id_ed task infra-deploy \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + SSH_FILE_NAME="id_ed" + + - name: Bootstrap nested cluster + working-directory: ci/dvp-e2e + run: | + echo "๐Ÿš€ dhctl bootstrap (profile: ${{ matrix.profile }})" + task dhctl-bootstrap \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + SSH_FILE_NAME="id_ed" \ + TARGET_STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" + + - name: Attach data disks to worker VMs using hotplug + working-directory: ci/dvp-e2e + run: | + task infra:attach-storage-disks-hotplug \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + DISK_SIZE="${ATTACH_DISK_SIZE:-10Gi}" \ + STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" \ + DISK_COUNT="2" + + - name: Build nested kubeconfig + working-directory: ci/dvp-e2e + run: | + task nested:kubeconfig \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + NAMESPACE="${{ env.RUN_ID }}" \ + SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ + SSH_FILE_NAME="id_ed" \ + NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ + PARENT_KUBECONFIG="${KUBECONFIG}" + + + - name: Configure storage backend (post-bootstrap) + run: | + set -euo pipefail + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" + PROFILE="${{ matrix.profile }}" + + echo "[INFO] Waiting for nested API Server readiness..." + for i in $(seq 1 30); do + if KUBECONFIG="$NESTED_KUBECONFIG" kubectl get nodes >/dev/null 2>&1; then + echo "[INFO] API Server is ready"; break + fi + echo "[INFO] Waiting for API Server... $i/30"; sleep 10 + done + + + + # Configure deckhouse-prod ModuleSource for profiles that need external modules (cephrbd, sds) + if [ "$PROFILE" = "cephrbd" ] || [ "$PROFILE" = "sds" ]; then + echo "[Deckhouse] Configuring ModuleSource 'deckhouse-prod' for module images..." + MODSRC_REPO="${{ vars.DECKHOUSE_PROD_MODULES_REPO || 'registry.deckhouse.io/deckhouse/ee/modules' }}" + CFG_B64="${{ env.REGISTRY_DOCKER_CFG || '' }}" + if [ -z "$CFG_B64" ]; then + echo "[ERR] REGISTRY_DOCKER_CFG is empty; ensure PROD_READ_REGISTRY_* secrets are configured." >&2 + exit 1 + else + success=0 + for attempt in $(seq 1 5); do + if REPO="$MODSRC_REPO" CFG="$CFG_B64" yq eval -n ' + .apiVersion = "deckhouse.io/v1alpha1" | + .kind = "ModuleSource" | + .metadata.name = "deckhouse-prod" | + .spec.registry.repo = env(REPO) | + .spec.registry.scheme = "HTTPS" | + .spec.registry.dockerCfg = env(CFG) | + .spec.releaseChannel = "EarlyAccess" + ' | KUBECONFIG="$NESTED_KUBECONFIG" kubectl apply --validate=false -f -; then + success=1 + break + fi + echo "[Deckhouse] ModuleSource apply failed (attempt $attempt); retry in 10s..." + sleep 10 + done + if [ "$success" -ne 1 ]; then + echo "[ERR] Failed to apply deckhouse-prod ModuleSource after retries" >&2 + exit 1 + fi + echo "[Deckhouse] ModuleSource 'deckhouse-prod' applied." + KUBECONFIG="$NESTED_KUBECONFIG" kubectl get modulesources.deckhouse.io || true + fi + fi + + # If Ceph profile requested, enable Ceph modules and apply CephCluster + echo "[Ceph] ModuleSource is configured; deferring Ceph setup to Taskfile..." + if false && [ "$PROFILE" = "cephrbd" ]; then + echo "[Ceph] Enabling operator-ceph and CSI-Ceph via ModuleConfig..." + for i in {1..12}; do + if KUBECONFIG="$NESTED_KUBECONFIG" kubectl --request-timeout=10s apply --validate=false -f ci/dvp-e2e/manifests/storage/operator-ceph.yaml; then + break; else echo "[Ceph] operator-ceph apply failed, retry $i/12"; sleep 10; fi + done + echo "[Ceph] Waiting for rook-ceph-operator deployment..." + for i in $(seq 1 50); do + if KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph get deploy rook-ceph-operator >/dev/null 2>&1; then + KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph rollout status deploy/rook-ceph-operator --timeout=180s && break + fi + echo "[Ceph] Waiting for rook-ceph-operator (attempt $i/60)"; sleep 10 + done + echo "[Ceph] Waiting for CephCluster CRD..." + KUBECONFIG="$NESTED_KUBECONFIG" kubectl wait --for=condition=Established --timeout=300s crd/cephclusters.ceph.rook.io || true + echo "[Ceph] Applying CephCluster CR (first, only CephCluster)" + for i in {1..12}; do + if yq 'select(.kind == "CephCluster")' ci/dvp-e2e/manifests/storage/ceph.yaml | \ + KUBECONFIG="$NESTED_KUBECONFIG" kubectl --request-timeout=15s apply --validate=false -f -; then + break; else echo "[Ceph] CephCluster apply failed, retry $i/12"; sleep 10; fi + done + + echo "[Ceph] Waiting for CephCluster phase=Ready and HEALTH_OK..." + for i in $(seq 1 50); do + PHASE=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph get cephcluster rook-ceph-cluster -o jsonpath='{.status.phase}' 2>/dev/null || true) + HEALTH=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph get cephcluster rook-ceph-cluster -o jsonpath='{.status.ceph.health}' 2>/dev/null || true) + echo "[Ceph] Status: phase=${PHASE:-?} health=${HEALTH:-?}" + if [ "$PHASE" = "Ready" ] && [ "$HEALTH" = "HEALTH_OK" ]; then + break + fi + sleep 20 + done + + echo "[Ceph] Getting cluster connection details..." + for i in $(seq 1 30); do + FSID=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph exec deploy/rook-ceph-tools -c ceph-tools -- ceph fsid 2>/dev/null | tr -d '\r\n' || true) + if [ -n "$FSID" ]; then break; fi + echo "[Ceph] Waiting for FSID (attempt $i/30)..."; sleep 5 + done + if [ -z "$FSID" ]; then + echo "[ERR] Failed to get Ceph FSID" >&2 + exit 1 + fi + echo "[Ceph] FSID: $FSID" + + USER_KEY=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph exec deploy/rook-ceph-tools -c ceph-tools -- ceph auth get-key client.admin 2>/dev/null | tr -d '\r\n' || true) + if [ -z "$USER_KEY" ]; then + echo "[ERR] Failed to get admin key" >&2 + exit 1 + fi + echo "[Ceph] Admin key obtained" + + MON_IPS=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph get svc -l ceph-mon -o jsonpath='{.items[*].spec.clusterIP}' 2>/dev/null || true) + if [ -z "$MON_IPS" ]; then + echo "[ERR] Failed to get monitor IPs" >&2 + exit 1 + fi + echo "[Ceph] Monitors: $MON_IPS" + + echo "[Ceph] Creating CephClusterConnection..." + MONITORS_YAML=$(echo "$MON_IPS" | tr ' ' '\n' | sed 's/^/ - /;s/$/:6789/') + { + echo "apiVersion: storage.deckhouse.io/v1alpha1" + echo "kind: CephClusterConnection" + echo "metadata:" + echo " name: ceph-cluster-1" + echo "spec:" + echo " clusterID: ${FSID}" + echo " monitors:" + echo "$MONITORS_YAML" + echo " userID: admin" + echo " userKey: ${USER_KEY}" + } | KUBECONFIG="$NESTED_KUBECONFIG" kubectl apply -f - + + echo "[Ceph] Waiting for CephClusterConnection to be Created..." + for i in $(seq 1 30); do + PHASE=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl get cephclusterconnection ceph-cluster-1 -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$PHASE" = "Created" ]; then break; fi + echo "[Ceph] CephClusterConnection phase=$PHASE, retry $i/30"; sleep 5 + done + + echo "[Ceph] Disabling cloud-provider-dvp module to avoid StorageClass conflict..." + yq eval -n ' + .apiVersion = "deckhouse.io/v1alpha1" | + .kind = "ModuleConfig" | + .metadata.name = "cloud-provider-dvp" | + .spec.enabled = false + ' | KUBECONFIG="$NESTED_KUBECONFIG" kubectl apply -f - + sleep 10 + + echo "[Ceph] Ensure required ServiceAccounts and imagePullSecrets (fallback)" + # Create SAs if missing + for sa in rook-ceph-cmd-reporter rook-ceph-default rook-ceph-mgr rook-ceph-osd; do + KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph create sa "$sa" --dry-run=client -o yaml | KUBECONFIG="$NESTED_KUBECONFIG" kubectl apply -f - || true + done + PULL_SECRET=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph get deploy rook-ceph-operator -o jsonpath='{.spec.template.spec.imagePullSecrets[0].name}' 2>/dev/null || true) + if [ -n "$PULL_SECRET" ]; then + echo "[Ceph] Propagating imagePullSecret '$PULL_SECRET' to SAs..." + for sa in default rook-ceph-mgr rook-ceph-osd; do + KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph patch sa "$sa" -p '{"imagePullSecrets":[{"name":"'$PULL_SECRET'"}]}' || true + done + else + echo "[WARN] rook-ceph-operator has no imagePullSecrets; skipping SA patch" + fi + + echo "[Ceph] Applying CephBlockPool (StorageClass will be created by CephStorageClass)..." + yq 'select(.kind == "CephBlockPool")' ci/dvp-e2e/manifests/storage/ceph.yaml | \ + KUBECONFIG="$NESTED_KUBECONFIG" kubectl --request-timeout=15s apply --validate=false -f - + + echo "[Ceph] Waiting for CephBlockPool to be Ready..." + for i in $(seq 1 30); do + BP_PHASE=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl -n d8-operator-ceph get cephblockpool pool-rbd-auto-test -o jsonpath='{.status.phase}' 2>/dev/null || true) + [ "$BP_PHASE" = "Ready" ] && break + echo "[Ceph] BlockPool phase=$BP_PHASE; retry $i/30"; sleep 10 + done + + echo "[Ceph] Creating CephStorageClass..." + yq eval -n ' + .apiVersion = "storage.deckhouse.io/v1alpha1" | + .kind = "CephStorageClass" | + .metadata.name = "ceph-pool-r2-csi-rbd-immediate" | + .spec.clusterConnectionName = "ceph-cluster-1" | + .spec.reclaimPolicy = "Delete" | + .spec.type = "RBD" | + .spec.rbd.defaultFSType = "ext4" | + .spec.rbd.pool = "pool-rbd-auto-test" + ' | KUBECONFIG="$NESTED_KUBECONFIG" kubectl apply -f - + + echo "[Ceph] Waiting for CephStorageClass to be Created..." + for i in $(seq 1 50); do + PHASE=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl get cephstorageclass ceph-pool-r2-csi-rbd-immediate -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$PHASE" = "Created" ]; then + echo "[Ceph] CephStorageClass is Created"; break + fi + echo "[Ceph] CephStorageClass phase=$PHASE, retry $i/60"; sleep 10 + done + echo "[Ceph] Waiting for StorageClass to be created by CephStorageClass..." + for i in $(seq 1 30); do + PROV=$(KUBECONFIG="$NESTED_KUBECONFIG" kubectl get sc ceph-pool-r2-csi-rbd-immediate -o jsonpath='{.provisioner}' 2>/dev/null || true) + if [ "$PROV" = "rbd.csi.ceph.com" ]; then + echo "[Ceph] StorageClass created with correct provisioner"; break + fi + echo "[Ceph] Waiting for StorageClass, retry $i/30"; sleep 5 + done + + echo "[Ceph] Setting ceph-pool-r2-csi-rbd-immediate as default StorageClass..." + DEFAULT_STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" + KUBECONFIG="$NESTED_KUBECONFIG" kubectl patch mc global --type='json' -p='[ + { + "op": "replace", + "path": "/spec/settings/defaultClusterStorageClass", + "value": "'$DEFAULT_STORAGE_CLASS'" + } + ]' + fi + + - name: Configure storage classes + working-directory: ci/dvp-e2e + run: | + echo "๐Ÿ’พ Configuring storage classes for profile: ${{ matrix.profile }}" + if [ -z "${{ matrix.profile }}" ]; then + echo "[ERR] matrix.profile is empty; aborting storage configuration" >&2 + exit 1 + fi + task nested:storage:configure \ + STORAGE_PROFILE="${{ matrix.profile }}" \ + TARGET_STORAGE_CLASS="${{ steps.profile-config.outputs.storage_class }}" \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + GENERATED_VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/generated-values.yaml" \ + SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ + SSH_FILE_NAME="id_ed" \ + PASSWORD_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password.txt" \ + PASSWORD_HASH_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password-hash.txt" \ + NAMESPACE="${{ env.RUN_ID }}" \ + DOMAIN="" \ + DEFAULT_USER="ubuntu" \ + NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" + + # Ingress smoke disabled: not required for storage config + + # Ceph CSI smoke check removed per request + + - name: Upload run context + if: always() + uses: actions/upload-artifact@v4 + with: + name: run-context-${{ env.RUN_ID }} + path: | + ci/dvp-e2e/tmp/runs/${{ env.RUN_ID }} + ci/dvp-e2e/tmp/shared + if-no-files-found: warn + + run-e2e: + name: E2E Tests (${{ matrix.profile }}) + needs: [setup, prepare] + runs-on: ubuntu-latest + timeout-minutes: 300 + concurrency: + group: e2e-${{ github.ref }}-${{ matrix.profile }} + cancel-in-progress: true + strategy: + fail-fast: false + matrix: + profile: ${{ fromJson(needs.setup.outputs.profiles) }} + + env: + GO_VERSION: "1.24.6" + TMP_ROOT: ${{ github.workspace }}/ci/dvp-e2e/tmp + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Install Task + uses: arduino/setup-task@v2 + with: + version: 3.x + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install Helm + uses: azure/setup-helm@v4.3.0 + with: + version: v3.17.2 + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: 'latest' + + - name: Install Deckhouse CLI + env: + D8_VERSION: v0.13.2 + run: | + set -euo pipefail + echo "Installing d8 ${D8_VERSION}..." + curl -fsSL -o d8.tgz "https://deckhouse.io/downloads/deckhouse-cli/${D8_VERSION}/d8-${D8_VERSION}-linux-amd64.tar.gz" + tar -xzf d8.tgz linux-amd64/bin/d8 + mv linux-amd64/bin/d8 /usr/local/bin/d8 + chmod +x /usr/local/bin/d8 + rm -rf d8.tgz linux-amd64 + d8 --version + + - name: Install yq + run: | + echo "Installing yq..." + curl -L -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 + chmod +x /usr/local/bin/yq + + - name: Restore run context + uses: actions/download-artifact@v4 + with: + name: run-context-${{ needs.prepare.outputs.run_id }} + path: . + + - name: Export run variables + run: | + echo "RUN_ID=${{ needs.prepare.outputs.run_id }}" >> "$GITHUB_ENV" + echo "PROFILE=${{ matrix.profile }}" >> "$GITHUB_ENV" + echo "STORAGE_CLASS=${{ needs.prepare.outputs.storage_class }}" >> "$GITHUB_ENV" + echo "IMAGE_STORAGE_CLASS=${{ needs.prepare.outputs.image_storage_class }}" >> "$GITHUB_ENV" + echo "SNAPSHOT_STORAGE_CLASS=${{ needs.prepare.outputs.snapshot_storage_class }}" >> "$GITHUB_ENV" + + - name: Build parent kubeconfig from secret (tests) + shell: bash + run: | + set -euo pipefail + mkdir -p "$HOME/.kube" + cat > "$HOME/.kube/config" <> "$GITHUB_ENV" + + - name: Prepare JUnit directory + run: | + mkdir -p "${{ github.workspace }}/ci/dvp-e2e/artifacts/${{ env.RUN_ID }}" + + - name: Run E2E tests + working-directory: ci/dvp-e2e + run: | + echo "๐Ÿงช Running E2E tests for profile: ${{ matrix.profile }}" + task nested:e2e \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + GENERATED_VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/generated-values.yaml" \ + SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ + SSH_FILE_NAME="id_ed" \ + PASSWORD_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password.txt" \ + PASSWORD_HASH_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password-hash.txt" \ + NAMESPACE="${{ env.RUN_ID }}" \ + DOMAIN="" \ + DEFAULT_USER="ubuntu" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + STORAGE_PROFILE="${{ matrix.profile }}" \ + NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ + JUNIT_PATH="${{ github.workspace }}/ci/dvp-e2e/artifacts/${{ env.RUN_ID }}/junit.xml" \ + FOCUS="" \ + SKIP="" \ + LABELS="" \ + STORAGE_CLASS="${STORAGE_CLASS}" \ + IMAGE_STORAGE_CLASS="${IMAGE_STORAGE_CLASS}" \ + SNAPSHOT_STORAGE_CLASS="${SNAPSHOT_STORAGE_CLASS}" \ + TIMEOUT="${{ inputs.timeout || '4h' }}" + + - name: Collect JUnit for this run + if: always() + run: | + JUNIT_OUT="${{ github.workspace }}/ci/dvp-e2e/artifacts/${{ env.RUN_ID }}/junit.xml" + mkdir -p "$(dirname "$JUNIT_OUT")" + if [ -f "$JUNIT_OUT" ]; then + echo "JUnit file found at $JUNIT_OUT" + else + echo "junit.xml not found at expected location $JUNIT_OUT" + fi + + - name: Collect matrix log + if: always() + run: | + mkdir -p ci/dvp-e2e/tmp/matrix-logs + LOG="ci/dvp-e2e/tmp/matrix-logs/${RUN_ID}.log" + { + echo "[START] run_id=${RUN_ID} time=$(date -Iseconds)" + echo "[FINISH] run_id=${RUN_ID} status=ok time=$(date -Iseconds)" + } >> "$LOG" + echo "โœ… Created matrix log: $LOG" + + - name: Upload test logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: logs-${{ env.RUN_ID }} + path: ci/dvp-e2e/tmp/matrix-logs + if-no-files-found: ignore + + - name: Upload JUnit report + if: always() + uses: actions/upload-artifact@v4 + with: + name: junit-${{ env.RUN_ID }} + path: ci/dvp-e2e/artifacts + if-no-files-found: ignore + + - name: Purge local artifacts and tmp + if: always() + run: | + rm -rf ci/dvp-e2e/artifacts ci/dvp-e2e/tmp || true + + # ============================================ + # 3. REPORT - Result aggregation + # ============================================ + report: + name: Report Results + needs: [setup, run-e2e] + if: always() + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Download matrix logs + uses: actions/download-artifact@v4 + with: + pattern: logs-* + path: ci/dvp-e2e/tmp/matrix-logs + merge-multiple: true + + - name: Download JUnit reports + if: always() + uses: actions/download-artifact@v4 + with: + pattern: junit-* + path: ci/dvp-e2e/artifacts + merge-multiple: true + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: 'latest' + + - name: Build parent kubeconfig from secret (report) + shell: bash + run: | + set -euo pipefail + mkdir -p "$HOME/.kube" + cat > "$HOME/.kube/config" <> "$GITHUB_ENV" + + # Artifacts are downloaded above; skip duplicate downloads + + - name: Generate matrix summary + if: always() + working-directory: ci/dvp-e2e + run: | + python3 scripts/loop_matrix_summary.py \ + --profiles "${{ join(needs.setup.outputs.profiles, ',') }}" \ + --run-id-prefix "nightly" \ + --log-dir "tmp/matrix-logs" \ + --webhook-url "${{ secrets.LOOP_WEBHOOK_URL || secrets.LOOP_WEBHOOK }}" \ + --channel "${{ secrets.LOOP_CHANNEL || 'test-virtualization-loop-alerts' }}" > matrix_summary.md || true + DATE=$(date +"%Y-%m-%d") + HASH=$(head -c 16 /dev/urandom | base64 | tr -dc 'a-z0-9' | head -c 8) + kubectl apply -f - <> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Profile | Status |" >> $GITHUB_STEP_SUMMARY + echo "|---------|--------|" >> $GITHUB_STEP_SUMMARY + + # Check each profile result by scanning collected artifacts + for profile in $(echo ${{ join(needs.setup.outputs.profiles, ' ') }} | tr -d '[]",'); do + if compgen -G "ci/dvp-e2e/artifacts/*${profile}*/junit.xml" > /dev/null; then + echo "| $profile | โœ… Completed |" >> $GITHUB_STEP_SUMMARY + else + echo "| $profile | โŒ Failed/Missing |" >> $GITHUB_STEP_SUMMARY + fi + done + + # ============================================ + # 4. CLEANUP - Resource cleanup + # ============================================ + cleanup: + name: Cleanup Resources + needs: report + if: always() + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Task + uses: arduino/setup-task@v2 + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + with: + version: 'latest' + + - name: Build parent kubeconfig from secret (cleanup) + shell: bash + run: | + set -euo pipefail + mkdir -p "$HOME/.kube" + cat > "$HOME/.kube/config" <> "$GITHUB_ENV" + + - name: Cleanup test namespaces + run: | + set -euo pipefail + PREFIX="nightly-nested-e2e-" + echo "๐Ÿงน Cleaning up namespaces matching prefix '${PREFIX}'" + mapfile -t CANDIDATES < <(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep "^${PREFIX}" || true) + OURS=() + for ns in "${CANDIDATES[@]:-}"; do + [ -z "$ns" ] && continue + if kubectl -n "$ns" get deploy jump-host >/dev/null 2>&1; then + OURS+=("$ns") + fi + done + if [ "${#OURS[@]}" -eq 0 ]; then + echo "[INFO] No namespaces to delete." + else + echo "[INFO] Deleting namespaces:" + printf ' - %s\n' "${OURS[@]}" + for ns in "${OURS[@]}"; do + kubectl delete ns "$ns" --wait=false || true + done + fi + + # Ingress smoke namespaces cleanup disabled + + - name: Report cleanup results + if: always() + run: | + echo "### Cleanup Results" >> $GITHUB_STEP_SUMMARY + echo "โœ… Cleanup job completed" >> $GITHUB_STEP_SUMMARY + echo "๐Ÿงน Attempted to clean up namespaces matching 'nightly-nested-e2e-*'" >> $GITHUB_STEP_SUMMARY diff --git a/.gitignore b/.gitignore index ae343f44fd..e2d3d20306 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,12 @@ retry/ # nodejs node_modules/ package-lock.json + +# CI/E2E artifacts and local binaries (cleanup) +artifacts/ +tests/e2e/artifacts/ +kubectl +ci/dvp-e2e/tmp/ + +# Task internal state +ci/dvp-e2e/.task/ diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml new file mode 100644 index 0000000000..20005044bf --- /dev/null +++ b/ci/dvp-e2e/Taskfile.yaml @@ -0,0 +1,1160 @@ +version: "3" +dotenv: + - .env + +vars: + # Paths and defaults + TMP_ROOT: + sh: git rev-parse --show-toplevel 2>/dev/null | xargs -I{} printf "%s/ci/dvp-e2e/tmp" {} + VALUES_TEMPLATE_FILE: values.yaml + SSH_FILE_NAME: cloud + + # Charts + INFRA_CHART_PATH: ./charts/infra + CLUSTER_CONFIG_CHART_PATH: ./charts/cluster-config + +tasks: + # ------------------------------------------------------------ + # Preflight + # ------------------------------------------------------------ + default: + silent: true + desc: Check required utilities + cmds: + - | + deps=("kubectl" "jq" "yq" "docker" "helm" "htpasswd" "ssh-keygen" "curl" "d8" "openssl") + for dep in "${deps[@]}"; do + if ! command -v "$dep" >/dev/null 2>&1; then + echo "Required utility '$dep' not found!" >&2 + exit 1 + fi + done + echo "All dependencies are installed!" + + password-gen: + desc: Generate password (openssl + bcrypt) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + PASSWORD_FILE: '{{ printf "%s/%s" .TMP_DIR "password.txt" }}' + PASSWORD_HASH_FILE: '{{ printf "%s/%s" .TMP_DIR "password-hash.txt" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - openssl rand -base64 20 > {{ .PASSWORD_FILE }} + - | + pw="$(cat {{ .PASSWORD_FILE }})" + htpasswd -BinC 10 "" <<< "$pw" | cut -d: -f2 | (base64 --wrap=0 2>/dev/null || base64 -w0 2>/dev/null || base64) > {{ .PASSWORD_HASH_FILE }} + status: + - test -f "{{ .PASSWORD_FILE }}" + - test -f "{{ .PASSWORD_HASH_FILE }}" + + ssh-gen: + desc: Generate ssh keypair for jump-host + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + cmds: + - mkdir -p "{{ .SSH_DIR }}" + - ssh-keygen -t ed25519 -o -a 64 -N "" -C "cloud" -f {{ .SSH_PRIV_KEY_FILE }} -q + - chmod 0600 "{{ .SSH_PRIV_KEY_FILE }}" + - chmod 0644 "{{ .SSH_PUB_KEY_FILE }}" + status: + - test -f "{{ .SSH_PRIV_KEY_FILE }}" + + # ------------------------------------------------------------ + # Values per run (namespaces, domain, prefix) + # ------------------------------------------------------------ + run:values:prepare: + desc: Prepare values.yaml for the run + vars: + RUN_ID: "{{ .RUN_ID }}" + RUN_NAMESPACE: "{{ .RUN_NAMESPACE }}" + RUN_DIR: '{{ .RUN_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + TARGET_VALUES_FILE: '{{ printf "%s/%s" .RUN_DIR "values.yaml" }}' + BASE_DOMAIN: + sh: yq eval '.domain // ""' {{ .VALUES_TEMPLATE_FILE }} + BASE_CLUSTER_PREFIX: + sh: yq eval '.clusterConfigurationPrefix // "cluster"' {{ .VALUES_TEMPLATE_FILE }} + cmds: + - mkdir -p {{ .RUN_DIR }} + - cp {{ .VALUES_TEMPLATE_FILE }} {{ .TARGET_VALUES_FILE }} + - yq eval --inplace '.namespace = "{{ .RUN_NAMESPACE }}"' {{ .TARGET_VALUES_FILE }} + - | + set -euo pipefail + DOMAIN_INPUT="{{ .BASE_DOMAIN }}" + if [ -n "$DOMAIN_INPUT" ]; then + DOMAIN_VAL="{{ .RUN_ID }}.$DOMAIN_INPUT" + else + DOMAIN_VAL="{{ .RUN_ID }}" + fi + export DOMAIN_VAL + yq eval --inplace '.domain = strenv(DOMAIN_VAL)' {{ .TARGET_VALUES_FILE }} + - | + set -euo pipefail + if command -v shasum >/dev/null 2>&1; then + RUN_ID_HASH=$(printf "%s" "{{ .RUN_ID }}" | shasum | awk '{print $1}' | cut -c1-6) + else + RUN_ID_HASH=$(printf "%s" "{{ .RUN_ID }}" | sha1sum 2>/dev/null | awk '{print $1}' | cut -c1-6) + fi + PREFIX_INPUT="{{ .BASE_CLUSTER_PREFIX }}-${RUN_ID_HASH}" + [ ${#PREFIX_INPUT} -gt 16 ] && PREFIX_INPUT="${PREFIX_INPUT:0:16}" + export PREFIX_INPUT + yq eval --inplace '.clusterConfigurationPrefix = strenv(PREFIX_INPUT)' {{ .TARGET_VALUES_FILE }} + + # ------------------------------------------------------------ + # Infra manifests and deployment + # ------------------------------------------------------------ + render-infra: + desc: Generate infra manifests + deps: + - task: ssh:ensure + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + DOMAIN: + sh: yq eval '.domain // ""' {{ .VALUES_FILE }} + sources: + - "./charts/infra/**/*" + - "{{ .VALUES_FILE }}" + generates: + - "{{ .TMP_DIR }}/infra.yaml" + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - printf "" > {{ .GENERATED_VALUES_FILE }} + - | + export SSH_PUB_KEY="$(cat {{ .SSH_PUB_KEY_FILE }})" + yq eval --inplace '.sshPublicKey = env(SSH_PUB_KEY)' {{ .GENERATED_VALUES_FILE }} + - | + DOMAIN_VALUE="{{ .DOMAIN }}" + if [ -n "$DOMAIN_VALUE" ] && [ "$DOMAIN_VALUE" != "null" ]; then + export DOMAIN_VALUE + yq eval --inplace '.domain = env(DOMAIN_VALUE)' {{ .GENERATED_VALUES_FILE }} + fi + - helm template dvp-over-dvp-infra {{ .INFRA_CHART_PATH }} -f {{ .VALUES_FILE }} -f {{ .GENERATED_VALUES_FILE }} > {{ .TMP_DIR }}/infra.yaml + + infra-deploy: + desc: Deploy infra (Namespace/RBAC/Jump-host) + deps: + - task: render-infra + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default "" }}' + SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - kubectl apply --validate=false -f {{ .TMP_DIR }}/infra.yaml + - kubectl -n {{ .NAMESPACE }} wait --for=condition=Ready pod -l app=jump-host --timeout=300s + - | + # Persist SSH keypair in parent cluster namespace for diagnostics tools (nested_diag.sh) + # Secret contains private and public parts; will be removed with namespace cleanup + kubectl -n {{ .NAMESPACE }} create secret generic e2e-ssh-key \ + --dry-run=client -o yaml \ + --from-file=cloud={{ .SSH_PRIV_KEY_FILE }} \ + --from-file=cloud.pub={{ .SSH_PUB_KEY_FILE }} \ + | kubectl apply -f - + + infra:create-storage-disks: + desc: Create storage disks for worker VMs before cluster bootstrap (for SDS/Ceph OSD) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' + DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' + WORKER_COUNT: '{{ .WORKER_COUNT | default "3" }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + echo "[INFRA] Creating {{ .DISK_COUNT }} storage disks per worker VM ({{ .WORKER_COUNT }} workers) in namespace {{ .NAMESPACE }}" + + # Create VirtualDisks for all expected worker VMs + # We'll use predictable naming based on Deckhouse's naming pattern + for worker_idx in $(seq 0 $(({{ .WORKER_COUNT }} - 1))); do + for disk_num in $(seq 1 {{ .DISK_COUNT }}); do + # Deckhouse generates VM names like: {prefix}-{hash}-worker-{suffix} + vd="storage-disk-${disk_num}-worker-${worker_idx}" + echo "[INFRA] Creating VirtualDisk $vd ({{ .DISK_SIZE }}, sc={{ .STORAGE_CLASS }})" + cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml + echo "[INFRA] VirtualDisk $vd created" + done + done + + infra:attach-storage-disks-hotplug: + desc: Attach storage disks to worker VMs using hotplug (VirtualMachineBlockDeviceAttachment) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' + DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + # Enable shell tracing when DEBUG_HOTPLUG is set + [ -n "${DEBUG_HOTPLUG:-}" ] && set -x || true + echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs using hotplug in namespace {{ .NAMESPACE }}" + + # Wait for worker VMs + for i in $(seq 1 50); do + worker_count=$(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker | wc -l) + if [ "$worker_count" -gt 0 ]; then + echo "[INFRA] Found $worker_count worker VMs"; break + fi + echo "[INFRA] Waiting for worker VMs... ($i/50)"; sleep 10 + done + + workers=() + while IFS= read -r line; do + [ -n "$line" ] && workers+=("$line") + done < <(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) + + if [ ${#workers[@]} -eq 0 ]; then + echo "[INFRA] No worker VMs found; nothing to do"; exit 0 + fi + + echo "[INFRA] Found ${#workers[@]} worker VMs: ${workers[*]}" + + for vm in "${workers[@]}"; do + [ -z "$vm" ] && continue + echo "[INFRA] Processing VM: $vm" + + # Wait for VM to be Running + for i in $(seq 1 50); do + phase=$(kubectl -n {{ .NAMESPACE }} get vm "$vm" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Running" ]; then + echo "[INFRA] VM $vm is Running"; break + fi + echo "[INFRA] VM $vm phase=$phase; retry $i/50"; sleep 10 + done + + for disk_num in $(seq 1 {{ .DISK_COUNT }}); do + vd="storage-disk-${disk_num}-$vm" + echo "[INFRA] Creating VirtualDisk $vd ({{ .DISK_SIZE }}, sc={{ .STORAGE_CLASS }})" + cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml + + # Wait for VirtualDisk to be Ready and PVC to be Bound + echo "[INFRA] Waiting for VirtualDisk $vd to be Ready..." + vd_phase="" + for j in $(seq 1 50); do + vd_phase=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$vd_phase" = "Ready" ]; then + echo "[INFRA] VirtualDisk $vd is Ready"; break + fi + echo "[INFRA] VD $vd phase=$vd_phase; retry $j/50"; sleep 5 + done + if [ "$vd_phase" != "Ready" ]; then + echo "[ERROR] VirtualDisk $vd not Ready" + kubectl -n {{ .NAMESPACE }} get vd "$vd" -o yaml || true + kubectl -n {{ .NAMESPACE }} get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + + pvc_name="" + for j in $(seq 1 30); do + pvc_name=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.target.persistentVolumeClaimName}' 2>/dev/null || true) + [ -n "$pvc_name" ] && break + echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/30"; sleep 3 + done + if [ -n "$pvc_name" ]; then + echo "[INFRA] Waiting PVC $pvc_name to reach phase=Bound..." + pvc_phase="" + for j in $(seq 1 120); do + pvc_phase=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$pvc_phase" = "Bound" ]; then + break + fi + echo "[INFRA] PVC $pvc_name phase=$pvc_phase; retry $j/120"; sleep 2 + done + if [ "$pvc_phase" != "Bound" ]; then + echo "[ERROR] PVC $pvc_name did not reach Bound" + kubectl -n {{ .NAMESPACE }} describe pvc "$pvc_name" || true + kubectl -n {{ .NAMESPACE }} get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + sc=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.storageClassName}' 2>/dev/null || true) + pv=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.volumeName}' 2>/dev/null || true) + vmode=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.volumeMode}' 2>/dev/null || true) + echo "[INFRA] PVC $pvc_name is Bound (sc=${sc:-?}, pv=${pv:-?}, mode=${vmode:-?})" + else + echo "[WARN] PVC name for VD $vd is empty; proceeding with attachment" + fi + + echo "[INFRA] Creating VirtualMachineBlockDeviceAttachment for $vd" + cat > /tmp/attach-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/attach-$vd.yaml + + echo "[INFRA] Waiting for hotplug attachment of $vd..." + success_by_vm=0 + for i in $(seq 1 30); do + phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Attached" ]; then + echo "[INFRA] Disk $vd successfully attached to VM $vm"; break + fi + # Quick success path: rely on VM status even if VMBDA still InProgress + if kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ + | jq -e --arg vd "$vd" '([.status.blockDeviceRefs[]? | select((.virtualMachineBlockDeviceAttachmentName==$vd) or (.name==$vd)) | select((.attached==true) and (.hotplugged==true))] | length) > 0' >/dev/null; then + echo "[INFRA] VM reports disk $vd attached/hotplugged; proceeding" + success_by_vm=1 + break + fi + + # Print status approximately every 30 seconds (poll interval is 5s) + if [ $((i % 6)) -eq 0 ]; then + echo "[INFRA] Disk $vd phase=$phase; retry $i/30" + fi + sleep 5 + + # Minimal periodic debug snapshot approximately every 60 seconds + if [ $((i % 12)) -eq 0 ]; then + echo "[DEBUG] VMBDA $vd summary:" + kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json \ + | jq -r '{phase: .status.phase, conditions: (.status.conditions // []) | map({type, status, reason, message})}' || true + echo "[DEBUG] VM $vm block devices (summary):" + kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ + | jq -r '{phase: .status.phase, blockDeviceRefs: (.status.blockDeviceRefs // []) | map({name, virtualMachineBlockDeviceAttachmentName, attached, hotplugged})}' || true + fi + done + + if [ "$phase" != "Attached" ] && [ "${success_by_vm:-0}" -ne 1 ]; then + echo "[ERROR] Disk $vd failed to attach to VM $vm within timeout" >&2 + echo "[DEBUG] Final VMBDA summary:" + kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json \ + | jq -r '{phase: .status.phase, conditions: (.status.conditions // []) | map({type, status, reason, message})}' || true + echo "[DEBUG] VM $vm block devices (summary):" + kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ + | jq -r '{phase: .status.phase, blockDeviceRefs: (.status.blockDeviceRefs // []) | map({name, virtualMachineBlockDeviceAttachmentName, attached, hotplugged})}' || true + exit 1 + fi + done + + echo "[INFRA] VM $vm configured with hotplug disks" + done + + echo "[INFRA] All worker VMs configured with storage disks via hotplug" + + infra:attach-worker-disks: + desc: Attach additional data disks to worker VMs (for SDS/Ceph OSD) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' + DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs in namespace {{ .NAMESPACE }}" + workers=() + while IFS= read -r line; do + [ -n "$line" ] && workers+=("$line") + done < <(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) + if [ ${#workers[@]} -eq 0 ]; then + echo "[INFRA] No worker VMs found"; exit 0 + fi + for vm in "${workers[@]}"; do + [ -z "$vm" ] && continue + for disk_num in $(seq 1 {{ .DISK_COUNT }}); do + vd="storage-disk-${disk_num}-$vm" + cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml + cat > /tmp/attach-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/attach-$vd.yaml + + echo "[INFRA] Waiting for hotplug attachment of $vd..." + for i in $(seq 1 30); do + phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Attached" ]; then + echo "[INFRA] Disk $vd successfully attached to VM $vm"; break + fi + # Print status approximately every 30 seconds + if [ $((i % 6)) -eq 0 ]; then + echo "[INFRA] Disk $vd phase=$phase; retry $i/30" + fi + sleep 5 + + # Periodic debug snapshot approximately every 60 seconds + if [ $((i % 12)) -eq 0 ]; then + echo "[DEBUG] VMBDA $vd status:" + kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json | jq -r '.status' || true + fi + done + + if [ "$phase" != "Attached" ]; then + # Fallback on VM events confirming successful hotplug + echo "[DEBUG] Checking VM events for hotplug success fallback..." + if kubectl -n {{ .NAMESPACE }} get events \ + --field-selector involvedObject.kind=VirtualMachine,involvedObject.name="$vm" \ + --sort-by=.lastTimestamp -ojson \ + | jq -r '.items[].message' 2>/dev/null \ + | grep -q -E "Successfully attach hotplugged volume.*\b$vd\b"; then + echo "[WARN] VMBDA phase not Attached, but VM reported success; treating as Attached (fallback)" + else + echo "[ERROR] Disk $vd failed to attach to VM $vm" >&2 + echo "[DEBUG] Final VMBDA status:" + kubectl -n {{ .NAMESPACE }} describe virtualmachineblockdeviceattachment "$vd" || true + # Filter controller/handler logs by our namespace/VM/VD + kubectl -n d8-virtualization logs deploy/virtualization-controller --tail=200 2>/dev/null | grep -E "{{ .NAMESPACE }}|$vm|$vd" || true + for h in $(kubectl -n d8-virtualization get pods -l app=virt-handler -o name 2>/dev/null || true); do + kubectl -n d8-virtualization logs --tail=200 "$h" | grep -E "{{ .NAMESPACE }}|$vm|$vd" || true + done + exit 1 + fi + fi + done + done + + # ------------------------------------------------------------ + # Kubeconfig for bootstrap and cluster config + # ------------------------------------------------------------ + render-kubeconfig: + desc: Generate kubeconfig for bootstrap + deps: + - password-gen + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + SERVER: + sh: | + HOST=$(kubectl -n d8-user-authn get ingress kubernetes-api -o json | jq -r '.spec.rules[0].host') + [ -z "$HOST" -o "$HOST" = "null" ] && { echo "[ERR] kubernetes-api ingress host not found" >&2; exit 1; } + echo "https://$HOST" + TOKEN: + sh: | + for i in $(seq 1 5); do + TOKEN=$(kubectl -n {{ .NAMESPACE }} create token dkp-sa --duration=10h 2>/dev/null) && break + echo "[WARN] Failed to issue SA token (attempt $i); retrying in 3s" >&2 + sleep 3 + done + [ -z "${TOKEN:-}" ] && { echo "[ERR] Unable to obtain token for dkp-sa" >&2; exit 1; } + echo "$TOKEN" + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + silent: true + cmds: + - mkdir -p {{ .TMP_DIR }} + - | + cat < {{ .TMP_DIR }}/kubeconfig.yaml + apiVersion: v1 + clusters: + - cluster: + server: {{ .SERVER }} + insecure-skip-tls-verify: true + name: dvp + contexts: + - context: + cluster: dvp + namespace: {{ .NAMESPACE }} + user: {{ .NAMESPACE }}@dvp + name: {{ .NAMESPACE }}@dvp + current-context: {{ .NAMESPACE }}@dvp + kind: Config + preferences: {} + users: + - name: {{ .NAMESPACE }}@dvp + user: + token: {{ .TOKEN }} + EOF + + render-cluster-config: + desc: Generate cluster config (helm template) + deps: + - render-kubeconfig + - password-gen + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' + PASSWORD_HASH_FILE: '{{ printf "%s/%s" .TMP_DIR "password-hash.txt" }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + cmds: + - printf "" > {{ .GENERATED_VALUES_FILE }} + - | + export PASSWORD_HASH="$(cat {{ .PASSWORD_HASH_FILE }})" + yq eval --inplace '.passwordHash = env(PASSWORD_HASH)' {{ .GENERATED_VALUES_FILE }} + - | + export NEW_KUBECONFIG_B64="$(cat {{ .TMP_DIR }}/kubeconfig.yaml | base64 | tr -d '\n')" + yq eval --inplace '.kubeconfigDataBase64 = env(NEW_KUBECONFIG_B64)' {{ .GENERATED_VALUES_FILE }} + - | + if [ -n "{{ .TARGET_STORAGE_CLASS | default "" }}" ]; then + export _SC='{{ .TARGET_STORAGE_CLASS }}' + yq eval --inplace '.storageClass = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.controlPlane.root = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.controlPlane.etcd = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.workers.root = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.workers.data = env(_SC)' {{ .GENERATED_VALUES_FILE }} + fi + - | + export SSH_PUB_KEY="$(cat {{ .SSH_PUB_KEY_FILE }})" + yq eval --inplace '.sshPublicKey = env(SSH_PUB_KEY)' {{ .GENERATED_VALUES_FILE }} + - helm template dvp-over-dvp-cluster-config {{ .CLUSTER_CONFIG_CHART_PATH }} -f {{ .VALUES_FILE }} -f {{ .GENERATED_VALUES_FILE }} > {{ .TMP_DIR }}/config.yaml + + dhctl-bootstrap: + desc: Bootstrap Deckhouse over DVP + deps: + - render-cluster-config + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DEFAULT_USER: + sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} + JUMPHOST_EXT_IP: + sh: kubectl -n {{ .NAMESPACE }} exec -it deployment/jump-host -- dig @resolver4.opendns.com myip.opendns.com +short | tr -d '\r' + JUMPHOST_NODEPORT: + sh: kubectl -n {{ .NAMESPACE }} get svc jump-host -o json | jq '.spec.ports[] | select(.port==2222) | .nodePort' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + IMAGE="dev-registry.deckhouse.io/sys/deckhouse-oss/install:main" + docker pull --platform=linux/amd64 "$IMAGE" + docker run --rm --platform=linux/amd64 \ + -v "{{ .TMP_DIR }}:/work" \ + "$IMAGE" \ + dhctl bootstrap \ + --config=/work/config.yaml \ + --ssh-agent-private-keys=/work/ssh/{{ .SSH_FILE_NAME }} \ + --ssh-user={{ .DEFAULT_USER }} \ + --ssh-bastion-port={{ .JUMPHOST_NODEPORT }} \ + --ssh-bastion-host={{ .JUMPHOST_EXT_IP }} \ + --ssh-bastion-user=user \ + --preflight-skip-availability-ports-check \ + --preflight-skip-deckhouse-user-check \ + --preflight-skip-registry-credential \ + --preflight-skip-deckhouse-edition-check \ + {{.CLI_ARGS}} + + # ------------------------------------------------------------ + # SSH Keys management (use GH keys or generate new ones) + # ------------------------------------------------------------ + ssh:import-gh: + desc: Download predefined SSH keys from deckhouse/virtualization repo + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default "id_ed" }}' + GH_RAW_URL_PRIV: 'https://raw.githubusercontent.com/deckhouse/virtualization/main/test/e2e/legacy/testdata/sshkeys/id_ed' + GH_RAW_URL_PUB: 'https://raw.githubusercontent.com/deckhouse/virtualization/main/test/e2e/legacy/testdata/sshkeys/id_ed.pub' + cmds: + - mkdir -p {{ .SSH_DIR }} + - curl -fsSL {{ .GH_RAW_URL_PRIV }} -o {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }} + - curl -fsSL {{ .GH_RAW_URL_PUB }} -o {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}.pub + - chmod 0600 {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }} + - chmod 0644 {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}.pub + status: + - test -f "{{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + + ssh:ensure: + desc: Ensure SSH keys exist (import from GH when USE_GH_SSH_KEYS=true) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default (env "SSH_FILE_NAME") | default "cloud" }}' + USE_GH_SSH_KEYS: '{{ .USE_GH_SSH_KEYS | default (env "USE_GH_SSH_KEYS") | default "false" }}' + cmds: + - | + set -euo pipefail + if [ "{{ .USE_GH_SSH_KEYS }}" = "true" ]; then + echo "[SSH] Importing GH keys to {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + task ssh:import-gh SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' + else + echo "[SSH] Generating new SSH keypair at {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + task ssh-gen SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' + fi + + # ------------------------------------------------------------ + # Local flow wrappers with logs (DVP-over-DVP) + # ------------------------------------------------------------ + local:bootstrap: + desc: Local flow โ€” deploy infra + bootstrap nested (logs saved) + vars: + RUN_ID: '{{ .RUN_ID | default (printf "local-%s" (now | date "20060102-150405")) }}' + RUN_NAMESPACE: '{{ .RUN_NAMESPACE | default (printf "dvp-e2e-local-%s" .RUN_ID) }}' + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + LOG_FILE: '{{ .LOG_FILE | default (printf "%s/%s" .TMP_DIR "bootstrap.log") }}' + VALUES_FILE: '{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}' + TARGET_STORAGE_CLASS: '{{ .TARGET_STORAGE_CLASS | default "ceph-pool-r2-csi-rbd-immediate" }}' + USE_GH_SSH_KEYS: '{{ .USE_GH_SSH_KEYS | default (env "USE_GH_SSH_KEYS") | default "true" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - | + set -euo pipefail + echo "[FLOW] Using RUN_ID={{ .RUN_ID }}, namespace={{ .RUN_NAMESPACE }}" + { + task run:values:prepare RUN_ID='{{ .RUN_ID }}' RUN_NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' + task render-infra VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' USE_GH_SSH_KEYS='{{ .USE_GH_SSH_KEYS }}' SSH_FILE_NAME='id_ed' + task infra-deploy VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' + task render-cluster-config VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' TARGET_STORAGE_CLASS='{{ .TARGET_STORAGE_CLASS }}' SSH_FILE_NAME='id_ed' + task dhctl-bootstrap VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' + } 2>&1 | tee '{{ .LOG_FILE }}' + + local:tests: + desc: Local flow โ€” prepare nested kubeconfig and run E2E (logs saved) + vars: + RUN_ID: '{{ .RUN_ID | default (printf "local-%s" (now | date "20060102-150405")) }}' + RUN_NAMESPACE: '{{ .RUN_NAMESPACE | default (printf "dvp-e2e-local-%s" .RUN_ID) }}' + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + LOG_FILE: '{{ .LOG_FILE | default (printf "%s/%s" .TMP_DIR "tests.log") }}' + E2E_DIR: '{{ .E2E_DIR | default (env "E2E_DIR") | default "../../tests/e2e" }}' + NESTED_SC: '{{ .NESTED_SC | default "ceph-pool-r2-csi-rbd-immediate" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - | + set -euo pipefail + { + task nested:kubeconfig NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' + task nested:storage:ceph NESTED_KUBECONFIG='{{ .TMP_DIR }}/nested-{{ .RUN_NAMESPACE }}/kubeconfig' TARGET_SC='{{ .NESTED_SC }}' + task nested:ensure-sc NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' SC_NAME='{{ .NESTED_SC }}' + task nested:ensure-vmclass-default NESTED_KUBECONFIG='{{ .TMP_DIR }}/nested-{{ .RUN_NAMESPACE }}/kubeconfig' + task nested:e2e NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' E2E_DIR='{{ .E2E_DIR }}' + } 2>&1 | tee '{{ .LOG_FILE }}' + + # ------------------------------------------------------------ + # Nested cluster helpers (SC + kubeconfig) + # ------------------------------------------------------------ + nested:kubeconfig: + desc: Build kubeconfig for nested cluster via jump-host + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: "{{ .NAMESPACE }}" + DOMAIN: + sh: yq eval '.domain // ""' {{ .VALUES_FILE }} + DEFAULT_USER: + sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' + NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' + PARENT_KUBECONFIG_PATH: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + if [ ! -s "{{ .PARENT_KUBECONFIG_PATH }}" ]; then + echo "[ERR] parent kubeconfig not found at {{ .PARENT_KUBECONFIG_PATH }}" + exit 1 + fi + mkdir -p {{ .NESTED_DIR }} + MASTER_NAME=$(KUBECONFIG={{ .PARENT_KUBECONFIG_PATH }} kubectl -n {{ .NAMESPACE }} get vm -l dvp.deckhouse.io/node-group=master -o jsonpath='{.items[0].metadata.name}') + if [ -z "$MASTER_NAME" ]; then + echo "[ERR] master VM not found in namespace {{ .NAMESPACE }}" >&2 + exit 1 + fi + TOKEN_FILE="{{ .NESTED_DIR }}/token.txt" + rm -f "$TOKEN_FILE" + SSH_OK=0 + for attempt in $(seq 1 6); do + if KUBECONFIG={{ .PARENT_KUBECONFIG_PATH }} d8 v ssh --username={{ .DEFAULT_USER }} --identity-file={{ .SSH_PRIV_KEY_FILE }} --local-ssh=true --local-ssh-opts="-o StrictHostKeyChecking=no" --local-ssh-opts="-o UserKnownHostsFile=/dev/null" "${MASTER_NAME}.{{ .NAMESPACE }}" -c ' + set -euo pipefail + SUDO="sudo /opt/deckhouse/bin/kubectl" + $SUDO -n kube-system get sa e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create sa e2e-admin >/dev/null 2>&1 + $SUDO -n kube-system get clusterrolebinding e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create clusterrolebinding e2e-admin --clusterrole=cluster-admin --serviceaccount=kube-system:e2e-admin >/dev/null 2>&1 + for i in $(seq 1 10); do + TOKEN=$($SUDO -n kube-system create token e2e-admin --duration=24h 2>/dev/null) && echo "$TOKEN" && break + echo "[WARN] Failed to create token (attempt $i/10); retrying in 3s" >&2 + sleep 3 + done + if [ -z "${TOKEN:-}" ]; then + echo "[ERR] Unable to create token for e2e-admin after 10 attempts" >&2 + exit 1 + fi + ' > "$TOKEN_FILE"; then + SSH_OK=1 + break + fi + echo "[WARN] d8 v ssh attempt $attempt failed; retry in 15s..." + sleep 15 + done + if [ "$SSH_OK" -ne 1 ] || [ ! -s "$TOKEN_FILE" ]; then + echo "[ERR] Failed to obtain nested token via d8 v ssh after multiple attempts" >&2 + cat "$TOKEN_FILE" 2>/dev/null || true + exit 1 + fi + NESTED_TOKEN=$(cat {{ .NESTED_DIR }}/token.txt) + SERVER_URL="https://api.{{ .NAMESPACE }}.{{ .DOMAIN }}" + { + printf 'apiVersion: v1\n' + printf 'kind: Config\n' + printf 'clusters:\n' + printf '- cluster:\n' + printf ' insecure-skip-tls-verify: true\n' + printf ' server: %s\n' "${SERVER_URL}" + printf ' name: nested\n' + printf 'contexts:\n' + printf '- context:\n' + printf ' cluster: nested\n' + printf ' user: e2e-admin\n' + printf ' name: nested\n' + printf 'current-context: nested\n' + printf 'users:\n' + printf '- name: e2e-admin\n' + printf ' user:\n' + printf ' token: %s\n' "${NESTED_TOKEN}" + } > {{ .NESTED_KUBECONFIG }} + chmod 600 {{ .NESTED_KUBECONFIG }} + echo "Generated nested kubeconfig at {{ .NESTED_KUBECONFIG }}" + + nested:ensure-sc: + desc: Ensure StorageClass exists in nested cluster + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' + NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' + SC_NAME: '{{ .SC_NAME | default "linstor-thin-r2" }}' + cmds: + - | + set -euo pipefail + if ! KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get sc "{{ .SC_NAME }}" >/dev/null 2>&1; then + echo "[ERR] StorageClass '{{ .SC_NAME }}' is missing in nested cluster" + exit 1 + fi + + nested:ensure-vmclass-default: + desc: Ensure default VMClass generic-for-e2e exists in nested cluster + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + cmds: + - | + set -euo pipefail + for i in $(seq 1 18); do + if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get vmclass generic >/dev/null 2>&1; then + KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get vmclass generic -o json \ + | jq 'del(.status) | .metadata={"name":"generic-for-e2e","annotations":{"virtualmachineclass.virtualization.deckhouse.io/is-default-class":"true"}}' \ + | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - >/dev/null + break + fi + echo "[INFO] Waiting for vmclass/generic to appear (attempt $i)..." + sleep 10 + done + + nested:storage:configure: + desc: Configure storage profile inside nested cluster (Ceph or SDS) + vars: + STORAGE_PROFILE: '{{ .STORAGE_PROFILE | default "ceph" }}' + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + TARGET_STORAGE_CLASS: '{{ .TARGET_STORAGE_CLASS }}' + STORAGE_PROFILE_NORMALIZED: + sh: | + case '{{ .STORAGE_PROFILE }}' in + sds|sds-local|sds_local|sds-replicated|sds_replicated) echo sds ;; + ceph|ceph-rbd|cephrbd) echo ceph ;; + *) echo '{{ .STORAGE_PROFILE }}' ;; + esac + cmds: + - cmd: 'echo "[STORAGE] normalized profile = {{ .STORAGE_PROFILE_NORMALIZED }}"' + - | + set -euo pipefail + case '{{ .STORAGE_PROFILE_NORMALIZED }}' in + ceph|sds) ;; + *) echo "Unknown storage profile: {{ .STORAGE_PROFILE }}" >&2; exit 1 ;; + esac + - | + set -euo pipefail + case '{{ .STORAGE_PROFILE_NORMALIZED }}' in + ceph) + echo "[CEPH] Configuring Ceph storage via Taskfile..." + ;; + sds) + echo "[SDS] Configuring SDS storage..." + ;; + esac + - | + set -euo pipefail + case '{{ .STORAGE_PROFILE_NORMALIZED }}' in + ceph) + task nested:storage:ceph \ + NESTED_KUBECONFIG='{{ .NESTED_KUBECONFIG }}' \ + TARGET_SC='{{ .TARGET_STORAGE_CLASS }}' + ;; + sds) + task nested:storage:sds \ + NESTED_KUBECONFIG='{{ .NESTED_KUBECONFIG }}' \ + SDS_SC_NAME='{{ .TARGET_STORAGE_CLASS }}' + ;; + esac + + nested:storage:sds: + desc: Configure SDS storage profile in nested cluster + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + SDS_SC_NAME: '{{ .SDS_SC_NAME | default "linstor-thin-r2" }}' + SDS_DVCR_SIZE: '{{ .SDS_DVCR_SIZE | default "5Gi" }}' + cmds: + - | + set -euo pipefail + echo "[SDS] Enabling SDS modules (node-configurator, replicated-volume)..." + success=0 + for attempt in $(seq 1 5); do + if { + yq eval -n ' + .apiVersion = "deckhouse.io/v1alpha1" | + .kind = "ModuleConfig" | + .metadata.name = "sds-node-configurator" | + .spec.enabled = true | + .spec.version = 1 | + .spec.source = "deckhouse-prod" + '; + echo '---'; + yq eval -n ' + .apiVersion = "deckhouse.io/v1alpha1" | + .kind = "ModuleConfig" | + .metadata.name = "sds-replicated-volume" | + .spec.enabled = true | + .spec.version = 1 | + .spec.source = "deckhouse-prod" + '; + } | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply --validate=false -f -; then + success=1 + break + fi + echo "[SDS] ModuleConfig apply failed (attempt $attempt); retrying in 10s..." + sleep 10 + done + if [ "$success" -ne 1 ]; then + echo "[ERR] Unable to enable SDS modules after retries" >&2 + exit 1 + fi + - | + set -euo pipefail + echo "[SDS] Waiting for SDS CRDs to be established..." + for crd in lvmvolumegroups.storage.deckhouse.io replicatedstoragepools.storage.deckhouse.io replicatedstorageclasses.storage.deckhouse.io; do + echo "[SDS] Waiting for CRD '$crd'..." + found=0 + for i in $(seq 1 50); do + if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get crd "$crd" >/dev/null 2>&1; then + found=1 + break + fi + echo "[SDS] CRD '$crd' not found yet, retry $i/50"; sleep 5 + done + if [ "$found" -eq 1 ]; then + KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl wait --for=condition=Established --timeout=180s crd "$crd" || true + else + echo "[WARN] CRD '$crd' not found after waiting" >&2 + fi + done + - | + set -euo pipefail + echo "[SDS] Creating per-node LVMVolumeGroups (type=Local)..." + NODES=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes -o json \ + | jq -r '.items[] | select(.metadata.labels["node-role.kubernetes.io/control-plane"]!=true and .metadata.labels["node-role.kubernetes.io/master"]!=true) | .metadata.name') + if [ -z "$NODES" ]; then + NODES=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes -o json | jq -r '.items[].metadata.name') + fi + for node in $NODES; do + [ -z "$node" ] && continue + yq eval -n " + .apiVersion = \"storage.deckhouse.io/v1alpha1\" | + .kind = \"LVMVolumeGroup\" | + .metadata.name = \"data-\" + env(NODE) | + .spec.type = \"Local\" | + .spec.local.nodeName = env(NODE) | + .spec.local.actualVGNameOnTheNode = \"data\" | + .spec.blockDeviceSelector.devicePaths = [\"/dev/sdb\",\"/dev/vdb\",\"/dev/xvdb\",\"/dev/sdc\",\"/dev/vdc\",\"/dev/xvdc\",\"/dev/sdd\",\"/dev/vdd\",\"/dev/xvdd\"] + " | NODE="$node" KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - + done + echo "[SDS] Creating ReplicatedStoragePool 'data' from LVMVolumeGroups..." + LVGS=$(printf "%s\n" $NODES | sed 's/^/ - name: data-/') + { + echo "apiVersion: storage.deckhouse.io/v1alpha1" + echo "kind: ReplicatedStoragePool" + echo "metadata:" + echo " name: data" + echo "spec:" + echo " type: LVM" + echo " lvmVolumeGroups:" + printf "%s\n" "$LVGS" + } | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - + echo "[SDS] Creating ReplicatedStorageClass '{{ .SDS_SC_NAME }}'..." + yq eval -n ' + .apiVersion = "storage.deckhouse.io/v1alpha1" | + .kind = "ReplicatedStorageClass" | + .metadata.name = "{{ .SDS_SC_NAME }}" | + .spec.storagePool = "data" | + .spec.reclaimPolicy = "Delete" | + .spec.topology = "Ignored" | + .spec.volumeAccess = "Local" + ' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - + - | + set -euo pipefail + if ! KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get storageclass "{{ .SDS_SC_NAME }}" >/dev/null 2>&1; then + echo "[ERR] StorageClass '{{ .SDS_SC_NAME }}' not found in nested cluster" >&2 + exit 1 + fi + - | + echo "[SDS] Setting {{ .SDS_SC_NAME }} as default StorageClass..." + DEFAULT_STORAGE_CLASS="{{ .SDS_SC_NAME }}" + KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl patch mc global --type='json' -p='[ + { + "op": "replace", + "path": "/spec/settings/defaultClusterStorageClass", + "value": "'$DEFAULT_STORAGE_CLASS'" + } + ]' + + nested:storage:ceph: + desc: Configure Ceph storage in nested cluster (enable modules, apply CephCluster, set default SC) + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + TARGET_SC: '{{ .TARGET_SC | default "ceph-pool-r2-csi-rbd-immediate" }}' + cmds: + - | + set -euo pipefail + export KUBECONFIG='{{ .NESTED_KUBECONFIG }}' + echo "[Deckhouse] Waiting for deploy/deckhouse to be Available..." + kubectl -n d8-system rollout status deploy/deckhouse --timeout=10m || true + echo "[Deckhouse] Waiting for service endpoints deckhouse..." + for i in $(seq 1 60); do + EPS=$(kubectl -n d8-system get ep deckhouse -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true) + [ -n "$EPS" ] && { echo "Endpoints: $EPS"; break; } || echo "waiting endpoints... $i/60"; sleep 5 + done + echo "[Deckhouse] Grace period for webhooks init..."; sleep 20 + echo "[Ceph] Applying ModuleConfig/MPO for operator-ceph/csi-ceph/snapshot-controller..." + for i in {1..12}; do + if kubectl --request-timeout=10s apply --validate=false -f manifests/storage/operator-ceph.yaml; then + break; else echo "[Ceph] operator-ceph.yaml apply failed, retry $i/12"; sleep 10; fi + done + # Rely on operator rollout + CRDs instead of module state polling + echo "[Ceph] Waiting for operator-ceph deployment..." + for i in $(seq 1 50); do + if kubectl -n d8-operator-ceph get deploy operator-ceph >/dev/null 2>&1; then + kubectl -n d8-operator-ceph rollout status deploy/operator-ceph --timeout=180s && break + fi + echo "[Ceph] operator-ceph not found yet ($i/50)"; sleep 10 + done + echo "[Ceph] Waiting for Ceph CRDs to appear..." + for crd in cephclusters.ceph.rook.io cephblockpools.ceph.rook.io cephfilesystems.ceph.rook.io; do + for i in $(seq 1 50); do + if kubectl get crd "$crd" >/dev/null 2>&1; then + echo "[Ceph] CRD $crd detected" + kubectl wait --for=condition=Established --timeout=300s crd/"$crd" || true + break + fi + echo "[Ceph] CRD $crd not found yet, retry $i/50"; sleep 5 + done + done + echo "[Ceph] Applying CephCluster CR..." + for i in {1..12}; do + if kubectl --request-timeout=10s apply --validate=false -f manifests/storage/ceph.yaml; then + break; else echo "[Ceph] ceph.yaml apply failed, retry $i/12"; sleep 10; fi + done + echo "[Ceph] Waiting for CephCluster phase=Ready and HEALTH_OK..." + for i in $(seq 1 50); do + PHASE=$(kubectl -n d8-operator-ceph get cephcluster rook-ceph-cluster -o jsonpath='{.status.phase}' 2>/dev/null || true) + HEALTH=$(kubectl -n d8-operator-ceph get cephcluster rook-ceph-cluster -o jsonpath='{.status.ceph.health}' 2>/dev/null || true) + echo "[Ceph] Status: phase=${PHASE:-?} health=${HEALTH:-?}" + if [ "$PHASE" = "Ready" ] && [ "$HEALTH" = "HEALTH_OK" ]; then + break + fi + sleep 20 + done + echo "[Ceph] Gathering cluster connection details..." + FSID="" + for i in $(seq 1 30); do + FSID=$(kubectl -n d8-operator-ceph exec deploy/rook-ceph-tools -c ceph-tools -- ceph fsid 2>/dev/null | tr -d '\r\n' || true) + [ -n "$FSID" ] && break || true + echo "[Ceph] Waiting for FSID (attempt $i/30)..."; sleep 5 + done + if [ -z "$FSID" ]; then + echo "[ERR] Failed to get Ceph FSID" >&2 + exit 1 + fi + USER_KEY=$(kubectl -n d8-operator-ceph exec deploy/rook-ceph-tools -c ceph-tools -- ceph auth get-key client.admin 2>/dev/null | tr -d '\r\n' || true) + if [ -z "$USER_KEY" ]; then + echo "[ERR] Failed to get admin key" >&2 + exit 1 + fi + MON_IPS=$(kubectl -n d8-operator-ceph get svc -l ceph-mon -o jsonpath='{.items[*].spec.clusterIP}' 2>/dev/null || true) + if [ -z "$MON_IPS" ]; then + echo "[ERR] Failed to get monitor IPs" >&2 + exit 1 + fi + echo "[Ceph] Creating CephClusterConnection..." + MONITORS_YAML=$(echo "$MON_IPS" | tr ' ' '\n' | sed 's/^/ - /;s/$/:6789/') + { + echo "apiVersion: storage.deckhouse.io/v1alpha1" + echo "kind: CephClusterConnection" + echo "metadata:" + echo " name: ceph-cluster-1" + echo "spec:" + echo " clusterID: ${FSID}" + echo " monitors:" + echo "$MONITORS_YAML" + echo " userID: admin" + echo " userKey: ${USER_KEY}" + } | kubectl apply -f - + echo "[Ceph] Waiting for CephClusterConnection to be Created..." + for i in $(seq 1 30); do + PHASE=$(kubectl get cephclusterconnection ceph-cluster-1 -o jsonpath='{.status.phase}' 2>/dev/null || true) + [ "$PHASE" = "Created" ] && break || true + echo "[Ceph] CephClusterConnection phase=$PHASE, retry $i/30"; sleep 5 + done + echo "[Ceph] Applying CephBlockPool (idempotent)..." + yq 'select(.kind == "CephBlockPool")' manifests/storage/ceph.yaml | kubectl --request-timeout=15s apply --validate=false -f - + echo "[Ceph] Waiting for CephBlockPool to be Ready..." + for i in $(seq 1 30); do + BP_PHASE=$(kubectl -n d8-operator-ceph get cephblockpool pool-rbd-auto-test -o jsonpath='{.status.phase}' 2>/dev/null || true) + [ "$BP_PHASE" = "Ready" ] && break || true + echo "[Ceph] BlockPool phase=$BP_PHASE; retry $i/30"; sleep 10 + done + echo "[Ceph] Creating CephStorageClass '{{ .TARGET_SC }}'..." + yq eval -n ' + .apiVersion = "storage.deckhouse.io/v1alpha1" | + .kind = "CephStorageClass" | + .metadata.name = "{{ .TARGET_SC }}" | + .spec.clusterConnectionName = "ceph-cluster-1" | + .spec.reclaimPolicy = "Delete" | + .spec.type = "RBD" | + .spec.rbd.defaultFSType = "ext4" | + .spec.rbd.pool = "pool-rbd-auto-test" + ' | kubectl apply -f - + echo "[Ceph] Waiting for CephStorageClass to be Created..." + for i in $(seq 1 50); do + PHASE=$(kubectl get cephstorageclass '{{ .TARGET_SC }}' -o jsonpath='{.status.phase}' 2>/dev/null || true) + [ "$PHASE" = "Created" ] && { echo "[Ceph] CephStorageClass is Created"; break; } || true + echo "[Ceph] CephStorageClass phase=$PHASE, retry $i/50"; sleep 10 + done + echo "[Ceph] Waiting for StorageClass '{{ .TARGET_SC }}'..." + for i in $(seq 1 50); do + kubectl get sc '{{ .TARGET_SC }}' >/dev/null 2>&1 && { echo "SC ready"; break; } || echo "waiting SC '{{ .TARGET_SC }}'... $i/50"; sleep 10 + done + echo "[Ceph] Setting '{{ .TARGET_SC }}' as default StorageClass via mc global..." + kubectl patch mc global --type='json' -p='[ + {"op":"replace","path":"/spec/settings/defaultClusterStorageClass","value":"'{{ .TARGET_SC }}'"} + ]' + + # ------------------------------------------------------------ + # Run E2E + # ------------------------------------------------------------ + nested:e2e: + desc: Run virtualization E2E tests against nested cluster + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' + NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' + E2E_DIR: '{{ .E2E_DIR | default (env "E2E_DIR") | default "../../tests/e2e" }}' + FOCUS: '{{ or .FOCUS "" }}' + SKIP: '{{ or .SKIP "" }}' + LABELS: '{{ or .LABELS "" }}' + TIMEOUT: '{{ or .TIMEOUT "4h" }}' + JUNIT_PATH: '{{ or .JUNIT_PATH "" }}' + TARGET_STORAGE_CLASS: '{{ if .STORAGE_CLASS }}{{ .STORAGE_CLASS }}{{ else if or (eq .STORAGE_PROFILE "ceph") (eq .STORAGE_PROFILE "ceph-rbd") (eq .STORAGE_PROFILE "cephrbd") }}ceph-pool-r2-csi-rbd{{ else }}linstor-thin-r2{{ end }}' + cmds: + - task: nested:kubeconfig + vars: + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: "{{ .NESTED_DIR }}" + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + - task: nested:ensure-sc + vars: + TMP_DIR: "{{ .TMP_DIR }}" + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: "{{ .NESTED_DIR }}" + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + SC_NAME: "{{ .TARGET_STORAGE_CLASS }}" + - task: nested:ensure-vmclass-default + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + - | + set -euo pipefail + export KUBECONFIG="{{ .NESTED_KUBECONFIG }}" + cd {{ .E2E_DIR }} + task run TIMEOUT='{{ .TIMEOUT }}' {{ if .FOCUS }}FOCUS='{{ .FOCUS }}'{{ end }} {{ if .LABELS }}LABELS='{{ .LABELS }}'{{ end }} {{ if .JUNIT_PATH }}JUNIT_PATH='{{ .JUNIT_PATH }}'{{ end }} diff --git a/ci/dvp-e2e/charts/cluster-config/.helmignore b/ci/dvp-e2e/charts/cluster-config/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ci/dvp-e2e/charts/cluster-config/Chart.yaml b/ci/dvp-e2e/charts/cluster-config/Chart.yaml new file mode 100644 index 0000000000..344eb6ee44 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +name: cluster-config +description: Cluster configuration for E2E testing +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - cluster + - configuration + - e2e + - testing +home: https://github.com/deckhouse/deckhouse +sources: + - https://github.com/deckhouse/deckhouse +maintainers: + - name: Deckhouse Team + email: team@deckhouse.io +dependencies: [] diff --git a/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml new file mode 100644 index 0000000000..bc9c836bfc --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml @@ -0,0 +1,48 @@ +# Cluster configuration for DVP-over-DVP E2E testing +apiVersion: deckhouse.io/v1 +kind: ClusterConfiguration +clusterType: Cloud +cloud: + provider: DVP + prefix: {{ .Values.clusterConfigurationPrefix | default "demo-cluster" }} +podSubnetCIDR: 10.112.0.0/16 +serviceSubnetCIDR: 10.223.0.0/16 +kubernetesVersion: "{{ .Values.deckhouse.kubernetesVersion }}" +clusterDomain: "internal.cluster.local" +--- +apiVersion: deckhouse.io/v1 +kind: InitConfiguration +deckhouse: + imagesRepo: dev-registry.deckhouse.io/sys/deckhouse-oss + registryDockerCfg: {{ .Values.deckhouse.registryDockerCfg | quote }} + devBranch: {{ .Values.deckhouse.tag }} +--- +apiVersion: deckhouse.io/v1 +kind: DVPClusterConfiguration +layout: Standard +sshPublicKey: {{ .Values.sshPublicKey }} +masterNodeGroup: + replicas: {{ .Values.instances.masterNodes.count }} + instanceClass: + virtualMachine: + bootloader: {{ .Values.image.bootloader }} + cpu: + cores: {{ .Values.instances.masterNodes.cores }} + coreFraction: {{ .Values.instances.masterNodes.coreFraction }} + memory: + size: {{ .Values.instances.masterNodes.memory }} + ipAddresses: + - Auto + virtualMachineClassName: "{{ .Values.namespace }}-cpu" + rootDisk: + size: 50Gi + storageClass: {{ .Values.storageClasses.controlPlane.root }} + image: + kind: VirtualImage + name: image + etcdDisk: + size: 15Gi + storageClass: {{ .Values.storageClasses.controlPlane.etcd }} +provider: + kubeconfigDataBase64: {{ .Values.kubeconfigDataBase64 }} + namespace: {{ .Values.namespace }} \ No newline at end of file diff --git a/ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml b/ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml new file mode 100644 index 0000000000..2887a2b168 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml @@ -0,0 +1,10 @@ +{{- $modules := list "upmeter" "local-path-provisioner" "pod-reloader" "secret-copier" "namespace-configurator" -}} +{{ range $modules }} +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: {{ . }} +spec: + enabled: false +{{ end }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml b/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml new file mode 100644 index 0000000000..dad2d77cd6 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: e2e-runner + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: e2e-runner-admin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: e2e-runner + namespace: kube-system + + + + + + + + diff --git a/ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml b/ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml new file mode 100644 index 0000000000..387a3c89bc --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: deckhouse.io/v1 +kind: IngressNginxController +metadata: + name: main +spec: + inlet: HostPort + enableIstioSidecar: false + ingressClass: nginx + hostPort: + httpPort: 80 + httpsPort: 443 + nodeSelector: + node-role.kubernetes.io/master: '' + tolerations: + - effect: NoSchedule + operator: Exists diff --git a/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml new file mode 100644 index 0000000000..91730513f6 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml @@ -0,0 +1,89 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: deckhouse +spec: + version: 1 + enabled: true + settings: + bundle: Default + logLevel: Info +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: global +spec: + version: 1 + settings: + defaultClusterStorageClass: ceph-pool-r2-csi-rbd-immediate + modules: + publicDomainTemplate: "%s.{{ .Values.namespace }}.{{ .Values.domain }}" +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: user-authn +spec: + version: 1 + enabled: true + settings: + controlPlaneConfigurator: + dexCAMode: DoNotNeed + publishAPI: + enabled: true + https: + mode: Global + global: + kubeconfigGeneratorMasterCA: "" +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: cni-cilium +spec: + version: 1 + enabled: true + settings: + tunnelMode: VXLAN +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: prompp +spec: + version: 1 + enabled: true +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: prompp +spec: + imageTag: stable + scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: snapshot-controller +spec: + version: 1 + enabled: true +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: snapshot-controller +spec: + imageTag: main + scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-replicated-volume +spec: + enabled: true + version: 1 diff --git a/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml b/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml new file mode 100644 index 0000000000..85b627695d --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml @@ -0,0 +1,34 @@ +{{- if .Values.features.nfs.enabled }} +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: csi-nfs +spec: + source: deckhouse + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: csi-nfs +spec: + imageTag: main + scanInterval: 10m +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: NFSStorageClass +metadata: + name: nfs +spec: + connection: + host: "nfs-server.{{ .Values.namespace }}.svc.cluster.local" + share: / + nfsVersion: "4.2" + mountOptions: + mountMode: hard + timeout: 60 + retransmissions: 3 + reclaimPolicy: Delete + volumeBindingMode: Immediate +{{- end }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml b/ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml new file mode 100644 index 0000000000..3672dc8e79 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml @@ -0,0 +1,37 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: NodeGroupConfiguration +metadata: + name: qemu-guest-agent-install-ubuntu.sh +spec: + weight: 98 + nodeGroups: ["*"] + bundles: ["ubuntu-lts", "debian"] + content: | + bb-apt-install qemu-guest-agent + systemctl enable --now qemu-guest-agent +--- +apiVersion: deckhouse.io/v1alpha1 +kind: NodeGroupConfiguration +metadata: + name: install-tools.sh +spec: + weight: 98 + nodeGroups: ["*"] + bundles: ["*"] + content: | + bb-sync-file /etc/profile.d/01-kubectl-aliases.sh - << "EOF" + source <(/opt/deckhouse/bin/kubectl completion bash) + alias k=kubectl + complete -o default -F __start_kubectl k + EOF + + if [ ! -f /usr/local/bin/k9s ]; then + K9S_URL=$(curl -s https://api.github.com/repos/derailed/k9s/releases/latest | jq '.assets[] | select(.name=="k9s_Linux_amd64.tar.gz") | .browser_download_url' -r) + curl -L "${K9S_URL}" | tar -xz -C /usr/local/bin/ "k9s" + fi + + if [ ! -f /usr/local/bin/stern ]; then + STERN_URL=$(curl -s https://api.github.com/repos/stern/stern/releases/latest | jq '.assets[].browser_download_url | select(. | test("linux_amd64"))' -r) + curl -L "${STERN_URL}" | tar -xz -C /usr/local/bin/ "stern" + fi diff --git a/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml b/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml new file mode 100644 index 0000000000..3779fb52a3 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml @@ -0,0 +1,40 @@ +{{ range .Values.instances.additionalNodes }} +--- +apiVersion: deckhouse.io/v1alpha1 +kind: DVPInstanceClass +metadata: + name: {{ .name }} +spec: + virtualMachine: + virtualMachineClassName: "{{ $.Values.namespace }}-cpu" + cpu: + cores: {{ .cores }} + coreFraction: {{ .coreFraction }} + memory: + size: {{ .memory }} + bootloader: {{ $.Values.image.bootloader }} + rootDisk: + size: 50Gi + storageClass: {{ $.Values.storageClasses.workers.root }} + image: + kind: VirtualImage + name: image +--- +apiVersion: deckhouse.io/v1 +kind: NodeGroup +metadata: + name: {{ .name }} +spec: +{{- if eq .name "system" }} + nodeTemplate: + labels: + node-role.deckhouse.io/system: "" +{{- end }} + nodeType: {{ .nodeType | default "CloudEphemeral" }} + cloudInstances: + minPerZone: {{ .count }} + maxPerZone: {{ .count }} + classReference: + kind: DVPInstanceClass + name: {{ .name }} +{{ end }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml b/ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml new file mode 100644 index 0000000000..6b8998a1e8 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: deckhouse.io/v1 +kind: ClusterAuthorizationRule +metadata: + name: admin +spec: + subjects: + - kind: User + name: admin@deckhouse.io + accessLevel: SuperAdmin + portForwarding: true +--- +apiVersion: deckhouse.io/v1 +kind: User +metadata: + name: admin +spec: + email: admin@deckhouse.io + # echo "t3chn0l0gi4" | htpasswd -BinC 10 "" | cut -d: -f2 | base64 -w0 + password: {{ .Values.passwordHash }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml new file mode 100644 index 0000000000..dffaf8b115 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml @@ -0,0 +1,28 @@ +--- +{{- if hasKey .Values "features" }} +{{- if .Values.features.virtualization }} +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: virtualization +spec: + enabled: true + version: 1 + settings: + dvcr: + storage: + persistentVolumeClaim: + size: 10Gi + type: PersistentVolumeClaim + virtualMachineCIDRs: + - 192.168.10.0/24 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: virtualization +spec: + imageTag: {{ .Values.virtualization.tag }} + scanInterval: 15s +{{- end }} +{{- end }} diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml new file mode 100644 index 0000000000..8c158bf0f4 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -0,0 +1,77 @@ +# Cluster configuration values for E2E testing + +# Instance configuration +instances: + masterNodes: + count: 1 + cores: 8 + coreFraction: 50% + memory: 20Gi + additionalNodes: + - name: worker + count: 3 + cores: 6 + coreFraction: 50% + memory: 12Gi + +# Deckhouse configuration +deckhouse: + tag: main + kubernetesVersion: Automatic + +# Virtualization configuration +virtualization: + tag: main + +# Image configuration +image: + url: https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/ubuntu/noble-server-cloudimg-amd64.img + defaultUser: ubuntu + bootloader: EFI + +# Ingress hosts +ingressHosts: + - api + - grafana + - dex + - prometheus + - console + - virtualization + +# Storage classes configuration (for parent cluster) +storageClasses: + controlPlane: + root: ceph-pool-r2-csi-rbd-immediate + etcd: ceph-pool-r2-csi-rbd-immediate + workers: + root: ceph-pool-r2-csi-rbd-immediate + +# Infrastructure components +infra: + nfs: + storageClass: nfs-4-1-wffc + dvcr: + storageClass: ceph-pool-r2-csi-rbd-immediate + +# Virtual disks configuration +virtualDisks: + os: + storageClass: ceph-pool-r2-csi-rbd-immediate + data: + storageClass: nfs-4-1-wffc + +# Security settings +security: + admissionPolicyEngine: + enabled: true + networkPolicies: + enabled: true + +# Feature flags +features: + virtualization: true + monitoring: true + logging: true + ingress: true + nfs: + enabled: false diff --git a/ci/dvp-e2e/charts/infra/.helmignore b/ci/dvp-e2e/charts/infra/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/ci/dvp-e2e/charts/infra/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ci/dvp-e2e/charts/infra/Chart.yaml b/ci/dvp-e2e/charts/infra/Chart.yaml new file mode 100644 index 0000000000..5eb2c3bfc0 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +name: infra +description: Infrastructure components for E2E testing +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - infrastructure + - e2e + - testing +home: https://github.com/deckhouse/deckhouse +sources: + - https://github.com/deckhouse/deckhouse +maintainers: + - name: Deckhouse Team + email: team@deckhouse.io +dependencies: [] diff --git a/ci/dvp-e2e/charts/infra/templates/ingress.yaml b/ci/dvp-e2e/charts/infra/templates/ingress.yaml new file mode 100644 index 0000000000..b813234319 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/ingress.yaml @@ -0,0 +1,74 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: dvp-over-dvp-80 + namespace: {{ .Values.namespace }} +spec: + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: http + selector: + dvp.deckhouse.io/node-group: master +--- +apiVersion: v1 +kind: Service +metadata: + name: dvp-over-dvp-443 + namespace: {{ .Values.namespace }} +spec: + ports: + - port: 443 + targetPort: 443 + protocol: TCP + name: https + selector: + dvp.deckhouse.io/node-group: master +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wildcard-https + namespace: {{ .Values.namespace }} + annotations: + nginx.ingress.kubernetes.io/ssl-passthrough: "true" + nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" +spec: + ingressClassName: nginx + rules: + {{- range .Values.ingressHosts }} + - host: "{{ . }}.{{ $.Values.namespace }}.{{ $.Values.domain }}" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: dvp-over-dvp-443 + port: + number: 443 + {{- end }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wildcard-http + namespace: {{ .Values.namespace }} + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "false" + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + ingressClassName: nginx + rules: + - host: "*.{{ .Values.namespace }}.{{ .Values.domain }}" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: dvp-over-dvp-80 + port: + number: 80 diff --git a/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml b/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml new file mode 100644 index 0000000000..b891407de1 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml @@ -0,0 +1,43 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jump-host + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: jump-host + template: + metadata: + labels: + app: jump-host + spec: + containers: + - name: jump-host + image: registry-dvp.dev.flant.dev/tools/jump-host:v0.1.2 + imagePullPolicy: Always + resources: + limits: + cpu: "200m" + memory: "200Mi" + requests: + cpu: "200m" + memory: "200Mi" + ports: + # SSH service for jump-host; use non-default port 2222 to avoid collisions + - containerPort: 2222 + env: + - name: SSH_KEY + value: "{{ .Values.sshPublicKey }}" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + securityContext: + runAsNonRoot: true + runAsUser: 1000 + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" \ No newline at end of file diff --git a/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml b/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml new file mode 100644 index 0000000000..73a02e541c --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: jump-host + namespace: {{ .Values.namespace }} +spec: + type: NodePort + selector: + app: jump-host + ports: + - protocol: TCP + # NodePort SSH entrypoint for jump-host; 2222 is chosen to avoid default SSH port 22 conflicts + port: 2222 + targetPort: 2222 diff --git a/ci/dvp-e2e/charts/infra/templates/ns.yaml b/ci/dvp-e2e/charts/infra/templates/ns.yaml new file mode 100644 index 0000000000..064087cab7 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/ns.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.namespace }} + labels: + heritage: deckhouse diff --git a/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml b/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml new file mode 100644 index 0000000000..1a6a4b9846 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: dkp-sa + namespace: {{ .Values.namespace }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: dkp-sa-secret + namespace: {{ .Values.namespace }} + annotations: + kubernetes.io/service-account.name: dkp-sa +type: kubernetes.io/service-account-token +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: dkp-sa-rb + namespace: {{ .Values.namespace }} +subjects: + - kind: ServiceAccount + name: dkp-sa + namespace: {{ .Values.namespace }} +roleRef: + kind: ClusterRole + name: d8:use:role:manager + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: dkp-sa-cluster-admin-{{ .Values.namespace }} +subjects: + - kind: ServiceAccount + name: dkp-sa + namespace: {{ .Values.namespace }} +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io diff --git a/ci/dvp-e2e/charts/infra/templates/vi.yaml b/ci/dvp-e2e/charts/infra/templates/vi.yaml new file mode 100644 index 0000000000..66034a649d --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/vi.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualImage +metadata: + name: image + namespace: {{ .Values.namespace }} +spec: + storage: ContainerRegistry + dataSource: + type: HTTP + http: + url: {{ .Values.image.url }} diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml new file mode 100644 index 0000000000..39330ced39 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -0,0 +1,7 @@ +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualMachineClass +metadata: + name: "{{ .Values.namespace }}-cpu" +spec: + cpu: + type: Discovery diff --git a/ci/dvp-e2e/charts/infra/values.yaml b/ci/dvp-e2e/charts/infra/values.yaml new file mode 100644 index 0000000000..9fe1b1ab6e --- /dev/null +++ b/ci/dvp-e2e/charts/infra/values.yaml @@ -0,0 +1,58 @@ +# Infrastructure values for E2E testing + +# Storage profiles (from original values.yaml) +storageProfiles: + default: + controlPlane: + root: linstor-thin-r2 + etcd: linstor-thin-r2 + workers: + root: linstor-thin-r2 + infra: + nfs: nfs-4-1-wffc + dvcr: linstor-thin-r2 + virtualDisks: + os: linstor-thin-r2 + data: nfs-4-1-wffc + cephrbd: + controlPlane: + root: ceph-pool-r2-csi-rbd + etcd: ceph-pool-r2-csi-rbd + workers: + root: ceph-pool-r2-csi-rbd + infra: + nfs: nfs-4-1-wffc + dvcr: ceph-pool-r2-csi-rbd + virtualDisks: + os: ceph-pool-r2-csi-rbd + data: ceph-pool-r2-csi-rbd + sds-local: + controlPlane: + root: sds-local-storage + etcd: sds-local-storage + workers: + root: sds-local-storage + infra: + nfs: nfs-4-1-wffc + dvcr: sds-local-storage + virtualDisks: + os: sds-local-storage + data: sds-local-storage + +# Network configuration +network: + domain: e2e.virtlab.flant.com + clusterConfigurationPrefix: x + internalNetworkCIDRs: + - "10.241.0.0/16" + +# Registry configuration +registry: + dockerCfg: "" + insecure: false + +# Monitoring configuration +monitoring: + enabled: true + retention: "7d" + scrapeInterval: "30s" diff --git a/ci/dvp-e2e/charts/support/Chart.yaml b/ci/dvp-e2e/charts/support/Chart.yaml new file mode 100644 index 0000000000..8eefb78886 --- /dev/null +++ b/ci/dvp-e2e/charts/support/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +name: support +description: Support components for E2E testing +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - support + - utilities + - e2e + - testing +home: https://github.com/deckhouse/deckhouse +sources: + - https://github.com/deckhouse/deckhouse +maintainers: + - name: Deckhouse Team + email: team@deckhouse.io +dependencies: [] diff --git a/ci/dvp-e2e/charts/support/values.yaml b/ci/dvp-e2e/charts/support/values.yaml new file mode 100644 index 0000000000..8d3f37bc5b --- /dev/null +++ b/ci/dvp-e2e/charts/support/values.yaml @@ -0,0 +1,62 @@ +# Support components values for E2E testing + +# Namespace configuration +namespace: nightly-e2e + +# Loop integration +loop: + webhook: "" + channel: "test-virtualization-loop-alerts" + enabled: true + +# Logging configuration +logging: + level: "info" + format: "json" + retention: "7d" + +# Notification settings +notifications: + slack: + enabled: false + webhook: "" + channel: "" + email: + enabled: false + smtp: + host: "" + port: 587 + username: "" + password: "" + +# Backup configuration +backup: + enabled: true + schedule: "0 2 * * *" + retention: "7d" + storage: + type: "local" + path: "/backups" + +# Health checks +healthChecks: + enabled: true + interval: "30s" + timeout: "10s" + retries: 3 + +# Resource monitoring +monitoring: + enabled: true + metrics: + enabled: true + port: 8080 + alerts: + enabled: true + rules: [] + +# Debug settings +debug: + enabled: false + verbose: false + trace: false diff --git a/ci/dvp-e2e/manifests/storage/ceph.yaml b/ci/dvp-e2e/manifests/storage/ceph.yaml new file mode 100644 index 0000000000..717749221c --- /dev/null +++ b/ci/dvp-e2e/manifests/storage/ceph.yaml @@ -0,0 +1,48 @@ +--- +apiVersion: ceph.rook.io/v1 +kind: CephCluster +metadata: + name: rook-ceph-cluster + namespace: d8-operator-ceph +spec: + cephVersion: + image: quay.io/ceph/ceph:v18.2.2 + allowUnsupported: false + dataDirHostPath: /var/lib/rook + mon: + count: 3 + allowMultiplePerNode: false + mgr: + count: 2 + allowMultiplePerNode: false + dashboard: + enabled: true + ssl: true + storage: + useAllNodes: true + useAllDevices: true + # Match typical additional block devices presented to worker VMs. + # Include SCSI/virtio/xen virtio variations, avoid root/etcd disks like sda/sdb/sdc. + deviceFilter: "^(sd[d-z]|vd[b-z]|xvd[b-z])$" + onlyApplyOSDPlacement: false + healthCheck: + daemonHealth: + mon: + disabled: false + interval: 45s + osd: + disabled: false + interval: 60s + status: + disabled: false + interval: 60s +--- +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: pool-rbd-auto-test + namespace: d8-operator-ceph +spec: + failureDomain: host + replicated: + size: 3 \ No newline at end of file diff --git a/ci/dvp-e2e/manifests/storage/operator-ceph.yaml b/ci/dvp-e2e/manifests/storage/operator-ceph.yaml new file mode 100644 index 0000000000..2b468b77df --- /dev/null +++ b/ci/dvp-e2e/manifests/storage/operator-ceph.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: operator-ceph +spec: + enabled: true + source: deckhouse-prod + version: 1 + settings: + csiDriver: + enable: true +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: csi-ceph +spec: + enabled: true + source: deckhouse-prod + version: 1 +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: snapshot-controller +spec: + enabled: true + source: deckhouse-prod + version: 1 diff --git a/ci/dvp-e2e/manifests/storage/sds-modules.yaml b/ci/dvp-e2e/manifests/storage/sds-modules.yaml new file mode 100644 index 0000000000..42030bda40 --- /dev/null +++ b/ci/dvp-e2e/manifests/storage/sds-modules.yaml @@ -0,0 +1,48 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-node-configurator +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-node-configurator +spec: + imageTag: main + scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-local-volume +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-local-volume +spec: + imageTag: main + scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-replicated-volume +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-replicated-volume +spec: + imageTag: main + scanInterval: 15s diff --git a/ci/dvp-e2e/manifests/storage/sds.yaml b/ci/dvp-e2e/manifests/storage/sds.yaml new file mode 100644 index 0000000000..0b8e27da48 --- /dev/null +++ b/ci/dvp-e2e/manifests/storage/sds.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: LVMVolumeGroup +metadata: + name: data +spec: + # Local VG; explicit local section is required for type=Local + type: Local + local: + actualVGNameOnTheNode: data + blockDeviceSelector: + devicePaths: + - /dev/sdd +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: ReplicatedStoragePool +metadata: + name: data +spec: + # Pool type must be LVM or LVMThin + type: LVM + lvmVolumeGroups: + - name: data +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: ReplicatedStorageClass +metadata: + name: linstor-thin-r2 +spec: + storagePool: data + reclaimPolicy: Delete + topology: Ignored + volumeAccess: Local diff --git a/ci/dvp-e2e/profiles.json b/ci/dvp-e2e/profiles.json new file mode 100644 index 0000000000..bc1737043a --- /dev/null +++ b/ci/dvp-e2e/profiles.json @@ -0,0 +1,18 @@ +[ + { + "name": "sds", + "storage_class": "linstor-thin-r2", + "image_storage_class": "linstor-thin-r1-immediate", + "snapshot_storage_class": "linstor-thin-r2", + "worker_data_disk_size": "10Gi", + "description": "SDS storage with LINSTOR thin provisioning" + }, + { + "name": "cephrbd", + "storage_class": "ceph-pool-r2-csi-rbd-immediate", + "image_storage_class": "ceph-pool-r2-csi-rbd-immediate", + "snapshot_storage_class": "ceph-pool-r2-csi-rbd-immediate", + "worker_data_disk_size": "10Gi", + "description": "Ceph RBD storage with immediate provisioning" + } +] diff --git a/ci/dvp-e2e/scripts/get_profile_config.sh b/ci/dvp-e2e/scripts/get_profile_config.sh new file mode 100755 index 0000000000..a89a448eb7 --- /dev/null +++ b/ci/dvp-e2e/scripts/get_profile_config.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# Script to get storage class configuration from profiles.json +# Usage: get_profile_config.sh + +set -euo pipefail + +PROFILE="${1:-}" +PROFILES_FILE="${2:-./profiles.json}" + +if [[ -z "$PROFILE" ]]; then + echo "Usage: $0 [profiles_file]" >&2 + exit 1 +fi + +if [[ ! -f "$PROFILES_FILE" ]]; then + echo "Profiles file not found: $PROFILES_FILE" >&2 + exit 1 +fi + +# Use jq to find profile by exact name only +PROFILE_CONFIG=$(jq -r --arg profile "$PROFILE" ' + .[] | select(.name == $profile) | + "\(.storage_class)|\(.image_storage_class)|\(.snapshot_storage_class)|\(.worker_data_disk_size // "10Gi")" +' "$PROFILES_FILE") + +if [[ -z "$PROFILE_CONFIG" || "$PROFILE_CONFIG" == "null" ]]; then + echo "Profile '$PROFILE' not found in $PROFILES_FILE" >&2 + echo "Available profiles:" >&2 + jq -r '.[] | " - \(.name)"' "$PROFILES_FILE" >&2 + exit 1 +fi + +# Split the result and export variables +IFS='|' read -r SC IMG_SC SNAP_SC ATTACH_SIZE <<< "$PROFILE_CONFIG" + +echo "STORAGE_CLASS=$SC" +echo "IMAGE_STORAGE_CLASS=$IMG_SC" +echo "SNAPSHOT_STORAGE_CLASS=$SNAP_SC" +echo "ATTACH_DISK_SIZE=$ATTACH_SIZE" diff --git a/ci/dvp-e2e/scripts/loop_junit_notify.py b/ci/dvp-e2e/scripts/loop_junit_notify.py new file mode 100755 index 0000000000..2ead80cc6e --- /dev/null +++ b/ci/dvp-e2e/scripts/loop_junit_notify.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parse JUnit XML and send test results to Loop webhook.""" + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +import xml.etree.ElementTree as ET +from datetime import datetime +from pathlib import Path + + +def load_env_file(env_path: Path) -> None: + """Load environment variables from .env file.""" + if not env_path.exists(): + return + + with open(env_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + # Don't override existing env vars + if key not in os.environ: + os.environ[key] = value.strip('"').strip("'") + + +def parse_junit_xml(junit_file: Path) -> dict: + """Parse JUnit XML file and extract test results.""" + try: + tree = ET.parse(junit_file) + root = tree.getroot() + + # Handle both testsuites and testsuite root elements + if root.tag == 'testsuites': + testsuites = root + else: + testsuites = root + + total_tests = int(testsuites.get('tests', 0)) + total_failures = int(testsuites.get('failures', 0)) + total_errors = int(testsuites.get('errors', 0)) + total_skipped = int(testsuites.get('skipped', 0)) + total_time = float(testsuites.get('time', 0)) + + # Calculate success rate + successful_tests = total_tests - total_failures - total_errors + success_rate = (successful_tests / total_tests * 100) if total_tests > 0 else 0 + + # Extract failed test details + failed_tests = [] + for testsuite in testsuites.findall('testsuite'): + for testcase in testsuite.findall('testcase'): + failure = testcase.find('failure') + error = testcase.find('error') + if failure is not None or error is not None: + failed_tests.append({ + 'name': testcase.get('name', 'unknown'), + 'class': testcase.get('classname', 'unknown'), + 'time': float(testcase.get('time', 0)), + 'message': (failure.get('message', '') if failure is not None else '') or + (error.get('message', '') if error is not None else '') + }) + + return { + 'total_tests': total_tests, + 'successful_tests': successful_tests, + 'failed_tests': total_failures + total_errors, + 'skipped_tests': total_skipped, + 'success_rate': success_rate, + 'total_time': total_time, + 'failed_test_details': failed_tests[:5], # Limit to first 5 failures + 'has_more_failures': len(failed_tests) > 5 + } + except ET.ParseError as e: + print(f"[ERR] Failed to parse JUnit XML: {e}", file=sys.stderr) + return None + except Exception as e: + print(f"[ERR] Error processing JUnit file: {e}", file=sys.stderr) + return None + + +def format_test_results(results: dict, run_id: str, storage_profile: str, timeout: str) -> str: + """Format test results into a readable message.""" + if results is None: + return f"โŒ Failed to parse test results for {run_id}" + + # Determine status emoji and color + if results['failed_tests'] == 0: + status_emoji = "โœ…" + status_text = "SUCCESS" + elif results['success_rate'] >= 80: + status_emoji = "โš ๏ธ" + status_text = "PARTIALLY SUCCESS" + else: + status_emoji = "โŒ" + status_text = "FAILED" + + # Format time + time_str = f"{results['total_time']:.1f}s" + if results['total_time'] > 60: + minutes = int(results['total_time'] // 60) + seconds = int(results['total_time'] % 60) + time_str = f"{minutes}m {seconds}s" + + # Build message + message_lines = [ + f"{status_emoji} E2E tests for virtualization completed", + f"๐Ÿ“‹ Run ID: {run_id}", + f"๐Ÿ’พ Storage: {storage_profile}", + f"โฑ๏ธ Timeout: {timeout}", + f"๐Ÿ• Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + "", + f"๐Ÿ“Š Results: {status_text}", + f"โ€ข Total tests: {results['total_tests']}", + f"โ€ข Passed: {results['successful_tests']}", + f"โ€ข Failed: {results['failed_tests']}", + f"โ€ข Skipped: {results['skipped_tests']}", + f"โ€ข Success rate: {results['success_rate']:.1f}%", + f"โ€ข Duration: {time_str}" + ] + + # Add failed test details if any + if results['failed_test_details']: + message_lines.extend([ + "", + "๐Ÿ” Failed tests:" + ]) + for test in results['failed_test_details']: + message_lines.append(f"โ€ข {test['class']}.{test['name']}") + if test['message']: + # Truncate long messages + msg = test['message'][:100] + "..." if len(test['message']) > 100 else test['message'] + message_lines.append(f" {msg}") + + if results['has_more_failures']: + message_lines.append(f"โ€ข ... and {len(results['failed_test_details']) - 5} more tests") + + return "\n".join(message_lines) + + +def send_to_loop(webhook_url: str, channel: str, message: str) -> bool: + """Send message to Loop webhook.""" + try: + payload = json.dumps({"channel": channel, "text": message}).encode("utf-8") + request = urllib.request.Request( + webhook_url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(request, timeout=30) as response: + response.read() + return True + except urllib.error.HTTPError as e: + print(f"[ERR] HTTP error {e.code}: {e.reason}", file=sys.stderr) + return False + except urllib.error.URLError as e: + print(f"[ERR] URL error: {e.reason}", file=sys.stderr) + return False + except Exception as e: + print(f"[ERR] Unexpected error: {e}", file=sys.stderr) + return False + + +def main(argv: list[str]) -> int: + # Load .env file if it exists + env_path = Path(__file__).parent.parent / '.env' + load_env_file(env_path) + + parser = argparse.ArgumentParser(description="Parse JUnit XML and send results to Loop") + parser.add_argument("--junit-file", required=True, help="Path to JUnit XML file") + parser.add_argument("--run-id", required=True, help="Test run ID") + parser.add_argument("--storage-profile", required=True, help="Storage profile used") + parser.add_argument("--webhook-url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) + parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) + parser.add_argument("--timeout", default="30m", help="Test timeout") + + args = parser.parse_args(argv) + + if not args.webhook_url: + print("[ERR] LOOP_WEBHOOK not set. Set via --webhook-url or LOOP_WEBHOOK env variable", file=sys.stderr) + return 1 + + junit_file = Path(args.junit_file) + if not junit_file.exists(): + print(f"[ERR] JUnit file not found: {junit_file}", file=sys.stderr) + return 1 + + # Parse JUnit results + results = parse_junit_xml(junit_file) + + # Format message + message = format_test_results(results, args.run_id, args.storage_profile, args.timeout) + + # Send to Loop + if send_to_loop(args.webhook_url, args.channel, message): + print(f"[OK] Results sent to Loop channel '{args.channel}'") + return 0 + else: + print(f"[ERR] Failed to send results to Loop", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/ci/dvp-e2e/scripts/loop_matrix_summary.py b/ci/dvp-e2e/scripts/loop_matrix_summary.py new file mode 100755 index 0000000000..cb248e140f --- /dev/null +++ b/ci/dvp-e2e/scripts/loop_matrix_summary.py @@ -0,0 +1,391 @@ +#!/usr/bin/env python3 +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parse matrix test logs and send summary to Loop webhook.""" + +import argparse +import json +import os +import re +import sys +import urllib.error +import urllib.request +from datetime import datetime +from pathlib import Path + + +def load_env_file(env_path: Path) -> None: + """Load environment variables from .env file.""" + if not env_path.exists(): + return + + with open(env_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + # Don't override existing env vars + if key not in os.environ: + os.environ[key] = value.strip('"').strip("'") + + +def parse_test_log(log_file: Path) -> dict: + """Parse test log file and extract results.""" + try: + content = log_file.read_text(encoding='utf-8') + + # Extract run ID from filename or content + run_id = log_file.stem + + # Look for test completion patterns + success_patterns = [ + r'\[OK\] run_id=([^\s]+) finished', + r'Ginkgo ran \d+ spec in [\d.]+s', + r'Test Suite Passed' + ] + + failure_patterns = [ + r'\[ERR\] run_id=([^\s]+) failed', + r'Ginkgo ran \d+ spec in [\d.]+s.*FAILED', + r'Test Suite Failed', + r'API response status: Failure', + r'admission webhook .* too long', + r'Unable to connect to the server', + r'Error while process exit code: exit status 1', + r'task: Failed to run task .* exit status', + r'Infrastructure runner \"master-node\" process exited' + ] + + # Check for explicit status markers first + start_match = re.search(r'\[START\].*run_id=([^\s]+)', content) + finish_match = re.search(r'\[FINISH\].*run_id=([^\s]+).*status=(\w+)', content) + + if start_match and finish_match: + # Use explicit status markers + success = finish_match.group(2) == 'ok' + failure = finish_match.group(2) == 'error' + else: + # Fallback to pattern matching + success = any(re.search(pattern, content, re.IGNORECASE) for pattern in success_patterns) + failure = any(re.search(pattern, content, re.IGNORECASE) for pattern in failure_patterns) + + # Extract storage profile from run_id + storage_profile = "unknown" + if '-' in run_id: + parts = run_id.split('-') + if len(parts) >= 2: + # Format: {prefix}-{profile}-{timestamp}-{random} + # For "test-sds-20251009-221516-17193", we want "sds" + storage_profile = parts[1] + + # Extract test statistics + test_stats = {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0} + + # Look for Ginkgo test results + ginkgo_match = re.search(r'Ran (\d+) of (\d+) Specs.*?(\d+) Passed.*?(\d+) Failed.*?(\d+) Skipped', content, re.DOTALL) + if ginkgo_match: + test_stats['total'] = int(ginkgo_match.group(1)) + test_stats['passed'] = int(ginkgo_match.group(3)) + test_stats['failed'] = int(ginkgo_match.group(4)) + test_stats['skipped'] = int(ginkgo_match.group(5)) + + # Extract timing information + duration = "unknown" + # Prefer explicit START/FINISH ISO markers + start_match = re.search(r'^\[START\].*time=([^\s]+)', content, re.MULTILINE) + finish_match = re.search(r'^\[FINISH\].*time=([^\s]+)', content, re.MULTILINE) + if start_match and finish_match: + try: + started = datetime.fromisoformat(start_match.group(1)) + finished = datetime.fromisoformat(finish_match.group(1)) + delta = finished - started + total_seconds = int(delta.total_seconds()) + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + seconds = total_seconds % 60 + duration = f"{hours}h {minutes}m {seconds}s" + except Exception: + pass + else: + # Fallback: try to find H:M:S pattern + time_match = re.search(r'(\d+):(\d+):(\d+)', content) + if time_match: + hours, minutes, seconds = time_match.groups() + duration = f"{hours}h {minutes}m {seconds}s" + + # Extract error details - only from E2E test execution + error_details = [] + if failure: + # Look for E2E test errors after "Running Suite" or "go run ginkgo" + e2e_start_patterns = [ + r'Running Suite:', + r'go run.*ginkgo', + r'Will run.*specs' + ] + + # Find E2E test section + e2e_start_pos = -1 + for pattern in e2e_start_patterns: + match = re.search(pattern, content, re.IGNORECASE) + if match: + e2e_start_pos = match.start() + break + + if e2e_start_pos > 0: + # Extract content after E2E tests started + e2e_content = content[e2e_start_pos:] + + # Look for actual test failures with cleaner patterns + test_error_patterns = [ + r'\[FAIL\].*?([^\n]+)', + r'FAIL!.*?--.*?(\d+) Passed.*?(\d+) Failed', + r'Test Suite Failed', + r'Ginkgo ran.*FAILED', + r'Error occurred during reconciliation.*?([^\n]+)', + r'Failed to update resource.*?([^\n]+)', + r'admission webhook.*denied the request.*?([^\n]+)', + r'context deadline exceeded', + r'timed out waiting for the condition.*?([^\n]+)', + r'panic.*?([^\n]+)' + ] + + for pattern in test_error_patterns: + matches = re.findall(pattern, e2e_content, re.IGNORECASE | re.DOTALL) + for match in matches: + if isinstance(match, tuple): + # Clean up the error message + error_msg = f"{match[0]}: {match[1]}" + else: + error_msg = match + + # Clean up ANSI escape codes and extra whitespace + error_msg = re.sub(r'\x1b\[[0-9;]*[mK]', '', error_msg) + error_msg = re.sub(r'\[0m\s*\[38;5;9m\s*\[1m', '', error_msg) + error_msg = re.sub(r'\[0m', '', error_msg) + error_msg = error_msg.strip() + + # Skip empty, very short messages, or artifacts + if len(error_msg) > 10 and not re.match(r'^\d+:\s*\d+$', error_msg): + error_details.append(error_msg) + + # Remove duplicates and limit to most meaningful errors + error_details = list(dict.fromkeys(error_details))[:2] + + return { + 'run_id': run_id, + 'storage_profile': storage_profile, + 'success': success and not failure, + 'failure': failure, + 'duration': duration, + 'test_stats': test_stats, + 'error_details': error_details, + 'log_file': str(log_file) + } + except Exception as e: + print(f"[WARN] Failed to parse log {log_file}: {e}", file=sys.stderr) + return { + 'run_id': log_file.stem, + 'storage_profile': 'unknown', + 'success': False, + 'failure': True, + 'duration': 'unknown', + 'test_stats': {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0}, + 'error_details': [f"Failed to parse log: {e}"], + 'log_file': str(log_file) + } + + +def format_matrix_summary(results: list, run_id_prefix: str, profiles: str, github_run_url: str = None) -> str: + """Format matrix test results into a readable message.""" + total_runs = len(results) + successful_runs = sum(1 for r in results if r['success']) + # Treat any non-success as failure for overall counters + failed_runs = total_runs - successful_runs + + # Calculate total test statistics + total_tests = sum(r['test_stats']['total'] for r in results) + total_passed = sum(r['test_stats']['passed'] for r in results) + total_failed = sum(r['test_stats']['failed'] for r in results) + total_skipped = sum(r['test_stats']['skipped'] for r in results) + + # Determine overall status + if total_runs == 0: + status_emoji = "โšช" + status_text = "NO RUNS" + elif failed_runs > 0: + status_emoji = "โŒ" + status_text = "FAILED" + else: + # No failures. Consider Passed if any run succeeded (skips allowed) + status_emoji = "โœ…" + status_text = "PASSED" + + # Group results by storage profile + profile_results = {} + for result in results: + profile = result['storage_profile'] + if profile not in profile_results: + profile_results[profile] = { + 'success': 0, + 'failure': 0, + 'test_stats': {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0} + } + if result['success']: + profile_results[profile]['success'] += 1 + else: + profile_results[profile]['failure'] += 1 + + # Aggregate test stats + for key in profile_results[profile]['test_stats']: + profile_results[profile]['test_stats'][key] += result['test_stats'][key] + + # Build message with table format + current_date = datetime.now().strftime('%Y-%m-%d') + test_type = "Nightly" if run_id_prefix in ["n", "nightly"] else run_id_prefix.upper() + + message_lines = [ + f"# :dvp: DVP-virtualization {current_date} {test_type} e2e Tests" + ] + + # Add table format for profile results + if profile_results: + message_lines.extend([ + "", + "| Storage Profile | Status | Passed | Failed | Skipped | Success Rate | Duration |", + "|----------------|--------|--------|--------|---------|--------------|----------|" + ]) + + for profile, stats in profile_results.items(): + total_configs = stats['success'] + stats['failure'] + config_success_rate = (stats['success'] / total_configs * 100) if total_configs > 0 else 0 + + test_stats = stats['test_stats'] + test_success_rate = (test_stats['passed'] / test_stats['total'] * 100) if test_stats['total'] > 0 else 0 + + status_emoji = "โœ…" if stats['failure'] == 0 else "โŒ" if stats['success'] == 0 else "โš ๏ธ" + status_text = "PASSED" if stats['failure'] == 0 else "FAILED" if stats['success'] == 0 else "PARTIAL" + + # Get duration and build linked profile name + profile_duration = "unknown" + for result in results: + if result['storage_profile'] == profile: + profile_duration = result['duration'] + break + name_md = f"[{profile.upper()}]({github_run_url})" if github_run_url else profile.upper() + + message_lines.append( + f"| {name_md} | {status_emoji} **{status_text}** | {test_stats['passed']} | {test_stats['failed']} | {test_stats['skipped']} | {test_success_rate:.1f}% | {profile_duration} |" + ) + + return "\n".join(message_lines) + + +def send_to_loop(webhook_url: str, channel: str, message: str) -> bool: + """Send message to Loop webhook.""" + try: + payload = json.dumps({"channel": channel, "text": message}).encode("utf-8") + request = urllib.request.Request( + webhook_url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(request, timeout=30) as response: + response.read() + return True + except urllib.error.HTTPError as e: + print(f"[ERR] HTTP error {e.code}: {e.reason}", file=sys.stderr) + return False + except urllib.error.URLError as e: + print(f"[ERR] URL error: {e.reason}", file=sys.stderr) + return False + except Exception as e: + print(f"[ERR] Unexpected error: {e}", file=sys.stderr) + return False + + +def main(argv: list[str]) -> int: + # Load .env file if it exists + env_path = Path(__file__).parent.parent / '.env' + load_env_file(env_path) + + parser = argparse.ArgumentParser(description="Parse matrix test logs and send summary to Loop") + parser.add_argument("--profiles", required=True, help="Comma-separated list of storage profiles") + parser.add_argument("--run-id-prefix", required=True, help="Run ID prefix") + parser.add_argument("--log-dir", required=True, help="Directory containing log files") + parser.add_argument("--webhook-url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) + parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) + parser.add_argument("--github-run-url", required=False, help="GitHub Actions run URL to link from profile name") + + args = parser.parse_args(argv) + + if not args.webhook_url: + print("[ERR] LOOP_WEBHOOK not set. Set via --webhook-url or LOOP_WEBHOOK env variable", file=sys.stderr) + return 1 + + log_dir = Path(args.log_dir) + if not log_dir.exists(): + print(f"[ERR] Log directory not found: {log_dir}", file=sys.stderr) + return 1 + + # Find all log files + log_files = list(log_dir.glob("*.log")) + if not log_files: + print(f"[WARN] No log files found in {log_dir}", file=sys.stderr) + return 0 + + # Parse all log files + results = [] + for log_file in log_files: + result = parse_test_log(log_file) + results.append(result) + + # Filter by run_id_prefix and profile (no aliases; use canonical names) + allowed_profiles = set([p.strip() for p in args.profiles.split(",")]) + filtered_results = [] + + for result in results: + # Filter by run_id prefix (more flexible matching) + if not result['run_id'].startswith(args.run_id_prefix): + continue + + # Filter by canonical profile name from run_id + normalized_profile = result['storage_profile'] + if normalized_profile not in allowed_profiles: + continue + + result['storage_profile'] = normalized_profile + filtered_results.append(result) + + results = filtered_results + + if not results: + print(f"[WARN] No results to report", file=sys.stderr) + return 0 + + # Format message + message = format_matrix_summary(results, args.run_id_prefix, args.profiles, github_run_url=args.github_run_url) + + # Send to Loop + if send_to_loop(args.webhook_url, args.channel, message): + print(f"[OK] Matrix summary sent to Loop channel '{args.channel}'") + return 0 + else: + print(f"[ERR] Failed to send matrix summary to Loop", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/ci/dvp-e2e/scripts/loop_notify.py b/ci/dvp-e2e/scripts/loop_notify.py new file mode 100644 index 0000000000..eac831ce78 --- /dev/null +++ b/ci/dvp-e2e/scripts/loop_notify.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Send notifications to Loop webhook.""" + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +from pathlib import Path + + +def load_env_file(env_path: Path) -> None: + """Load environment variables from .env file.""" + if not env_path.exists(): + return + + with open(env_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + # Don't override existing env vars + if key not in os.environ: + os.environ[key] = value.strip('"').strip("'") + + +def send_post_request(url: str, channel: str, text: str) -> None: + """Send JSON payload to Loop webhook.""" + + payload = json.dumps({"channel": channel, "text": text}).encode("utf-8") + request = urllib.request.Request( + url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(request, timeout=30) as response: # noqa: S310 + # We just ensure the request succeeded; the body is usually empty. + response.read() + + +def main(argv: list[str]) -> int: + # Load .env file if it exists + env_path = Path(__file__).parent.parent / '.env' + load_env_file(env_path) + + parser = argparse.ArgumentParser(description="Send message to Loop webhook") + parser.add_argument("--url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) + parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) + parser.add_argument("--text", required=True, help="Message text") + + args = parser.parse_args(argv) + + if not args.url: + print("[ERR] LOOP_WEBHOOK not set. Set via --url or LOOP_WEBHOOK env variable", file=sys.stderr) + return 1 + + try: + send_post_request(url=args.url, channel=args.channel, text=args.text) + except urllib.error.HTTPError as exc: # pragma: no cover - network failure path + print(f"[ERR] HTTP error {exc.code}: {exc.reason}", file=sys.stderr) + return 1 + except urllib.error.URLError as exc: # pragma: no cover - network failure path + print(f"[ERR] URL error: {exc.reason}", file=sys.stderr) + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) + diff --git a/ci/dvp-e2e/values.yaml b/ci/dvp-e2e/values.yaml new file mode 100644 index 0000000000..3379c93532 --- /dev/null +++ b/ci/dvp-e2e/values.yaml @@ -0,0 +1,69 @@ +storageProfiles: + default: + controlPlane: + root: linstor-thin-r2 + etcd: linstor-thin-r2 + workers: + root: linstor-thin-r2 + infra: + nfs: nfs-4-1-wffc + dvcr: linstor-thin-r2 + virtualDisks: + os: linstor-thin-r2 + data: nfs-4-1-wffc + cephrbd: + controlPlane: + root: ceph-pool-r2-csi-rbd + etcd: ceph-pool-r2-csi-rbd + workers: + root: ceph-pool-r2-csi-rbd + infra: + nfs: nfs-4-1-wffc + dvcr: ceph-pool-r2-csi-rbd + virtualDisks: + os: ceph-pool-r2-csi-rbd + data: ceph-pool-r2-csi-rbd + sds-local: + controlPlane: + root: sds-local-storage + etcd: sds-local-storage + workers: + root: sds-local-storage + infra: + nfs: nfs-4-1-wffc + dvcr: sds-local-storage + virtualDisks: + os: sds-local-storage + data: sds-local-storage +domain: e2e.virtlab.flant.com +clusterConfigurationPrefix: e2e +deckhouse: + tag: main + kubernetesVersion: Automatic +virtualization: + tag: main +features: + virtualization: true +image: + url: https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/ubuntu/noble-server-cloudimg-amd64.img + defaultUser: ubuntu + bootloader: EFI +ingressHosts: + - api + - grafana + - dex + - prometheus + - console + - virtualization +instances: + masterNodes: + count: 1 + cores: 8 + coreFraction: 50% + memory: 20Gi + additionalNodes: + - name: worker + count: 3 + cores: 6 + coreFraction: 50% + memory: 12Gi diff --git a/tests/e2e/Taskfile.yaml b/tests/e2e/Taskfile.yaml index 6ff25822bb..7ae7b8eb8c 100644 --- a/tests/e2e/Taskfile.yaml +++ b/tests/e2e/Taskfile.yaml @@ -81,7 +81,10 @@ tasks: --timeout=3h \ {{end -}} {{if .FOCUS -}} - --focus "{{ .FOCUS }}" + --focus "{{ .FOCUS }}" \ + {{end -}} + {{if .JUNIT_PATH -}} + --junit-report="{{ .JUNIT_PATH }}" {{end -}} cleanup: desc: "Cleanup namespaces & resources left from e2e tests"