From 846a4bfb683ec755e5ca6d2880b551d2de4e621e Mon Sep 17 00:00:00 2001 From: Marius Cornea Date: Tue, 13 Jan 2026 19:46:54 +0200 Subject: [PATCH 1/3] Add deployment script for QUADS self-scheduling hw Add an interactive deployment script that combines QUADS self-service lab assignment with jetlag OCP deployment by integrating ansible-quads-ssm playbook with jetlag repo. Features: - Makefile-driven workflow with phased targets (repos, assignment, inventory, bastion, cluster) - Interactive configuration prompts for QUADS credentials, OCP version, cluster type (MNO/SNO), and network stack (IPv4/IPv6/dual) - Automatic r630 server detection to enable iDRAC reset when needed - Assignment state tracking for resuming failed deployments Signed-off-by: Marius Cornea --- .gitignore | 4 + scripts/self-sched-deploy/Makefile | 388 ++++++++++++++++++ scripts/self-sched-deploy/README.md | 160 ++++++++ scripts/self-sched-deploy/check-r630.sh | 66 +++ scripts/self-sched-deploy/prompt-config.sh | 382 +++++++++++++++++ .../templates/quads_config.yml.j2 | 15 + 6 files changed, 1015 insertions(+) create mode 100644 scripts/self-sched-deploy/Makefile create mode 100644 scripts/self-sched-deploy/README.md create mode 100755 scripts/self-sched-deploy/check-r630.sh create mode 100755 scripts/self-sched-deploy/prompt-config.sh create mode 100644 scripts/self-sched-deploy/templates/quads_config.yml.j2 diff --git a/.gitignore b/.gitignore index 9c186b89..eb61c085 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ gen .idea/ .idea/workspace.xml +# Self-sched-deploy generated files +scripts/self-sched-deploy/vars/config.env +scripts/self-sched-deploy/vars/state.env +scripts/self-sched-deploy/repos/ diff --git a/scripts/self-sched-deploy/Makefile b/scripts/self-sched-deploy/Makefile new file mode 100644 index 00000000..2375320f --- /dev/null +++ b/scripts/self-sched-deploy/Makefile @@ -0,0 +1,388 @@ +# Makefile for jetlag self-sched-deploy +# Interactive OCP deployment with QUADS lab assignment + +SHELL := /bin/bash +.SHELLFLAGS := -e -o pipefail -c +.PHONY: all deploy configure bootstrap repos create-assignment inventory bastion cluster clean status terminate-assignment help + +# Paths +SCRIPT_DIR := $(shell pwd) +JETLAG_ROOT := $(shell cd ../.. && pwd) + +# Virtual environment activation (must be after JETLAG_ROOT) +ACTIVATE_VENV := source $(JETLAG_ROOT)/.ansible/bin/activate +CONFIG_FILE := $(SCRIPT_DIR)/vars/config.env +STATE_FILE := $(SCRIPT_DIR)/vars/state.env +REPOS_DIR := $(SCRIPT_DIR)/repos +QUADS_REPO := $(REPOS_DIR)/ansible-quads-ssm + +# Colors +RED := \033[0;31m +GREEN := \033[0;32m +YELLOW := \033[1;33m +BLUE := \033[0;36m +NC := \033[0m + +#------------------------------------------------------------------------------ +# Main targets +#------------------------------------------------------------------------------ + +all: deploy + +# Full deployment: prompt for ALL config upfront, then run all phases +deploy: configure bootstrap repos create-assignment-run inventory-run bastion-run cluster-run + @echo "" + @echo -e "$(GREEN)=============================================$(NC)" + @echo -e "$(GREEN)Deployment Complete!$(NC)" + @echo -e "$(GREEN)=============================================$(NC)" + @if [ -f "$(STATE_FILE)" ]; then \ + source $(STATE_FILE) && \ + echo "Cloud: $$CLOUD_NAME" && \ + echo "Cluster config: $(JETLAG_ROOT)/ansible/$$CLOUD_NAME" && \ + echo "" && \ + echo "Access your cluster:" && \ + echo "export KUBECONFIG=$(JETLAG_ROOT)/ansible/kubeconfig"; \ + fi + +# Configure: prompt for all parameters upfront +configure: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Configuration$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @$(SCRIPT_DIR)/prompt-config.sh all + +#------------------------------------------------------------------------------ +# Phase: Bootstrap Jetlag Environment +#------------------------------------------------------------------------------ + +bootstrap: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: Bootstrap Jetlag Environment$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @if [ ! -d "$(JETLAG_ROOT)/.ansible" ]; then \ + echo "Running jetlag bootstrap..."; \ + cd $(JETLAG_ROOT) && ./bootstrap.sh; \ + else \ + echo "Jetlag environment already bootstrapped."; \ + fi + @echo -e "$(GREEN)Bootstrap complete.$(NC)" + +#------------------------------------------------------------------------------ +# Phase: Clone/Update Repositories +#------------------------------------------------------------------------------ + +repos: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: Clone/Update Repositories$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @mkdir -p $(REPOS_DIR) + @if [ -d "$(QUADS_REPO)" ]; then \ + echo "Updating ansible-quads-ssm..."; \ + cd $(QUADS_REPO) && git pull; \ + else \ + echo "Cloning ansible-quads-ssm..."; \ + git clone https://github.com/quadsproject/ansible-quads-ssm.git $(QUADS_REPO); \ + fi + @echo -e "$(GREEN)Repository ready.$(NC)" + +#------------------------------------------------------------------------------ +# Phase: QUADS Create Assignment +#------------------------------------------------------------------------------ + +# User-facing target: prompts for QUADS config only +create-assignment: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: QUADS Create Assignment$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @$(SCRIPT_DIR)/prompt-config.sh quads + @$(MAKE) --no-print-directory create-assignment-run + +# Internal target: runs without prompting (config must exist) +create-assignment-run: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: QUADS Create Assignment$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @set -a && source $(CONFIG_FILE) && set +a && \ + if [ "$$USE_EXISTING_ASSIGNMENT" = "true" ]; then \ + echo -e "$(YELLOW)Using existing assignment: $$CLOUD_NAME$(NC)"; \ + else \ + echo "Generating QUADS configuration..."; \ + envsubst < $(SCRIPT_DIR)/templates/quads_config.yml.j2 > $(QUADS_REPO)/quads_config.yml; \ + echo "Running QUADS assignment..."; \ + cd $(QUADS_REPO) && \ + ansible-playbook quads_self_schedule.yml \ + -e "workload_name='$$WORKLOAD_NAME'" \ + -e "num_hosts=$$NUM_HOSTS" | tee /tmp/quads_output.log; \ + CLOUD_NAME=$$(grep -oP '"cloud_name":\s*"\K[^"]+' /tmp/quads_output.log | head -1 || true); \ + if [ -z "$$CLOUD_NAME" ]; then \ + CLOUD_NAME=$$(grep -oP '"cloud":\s*\{"name":\s*"\K[^"]+' /tmp/quads_output.log | head -1 || true); \ + fi; \ + if [ -z "$$CLOUD_NAME" ]; then \ + echo -e "$(RED)Error: Could not extract cloud_name from QUADS output$(NC)"; \ + echo "Please check the output above and manually set CLOUD_NAME in $(STATE_FILE)"; \ + exit 1; \ + fi; \ + ASSIGNMENT_ID=$$(grep -oP '"assignment_id":\s*"\K[^"]+' /tmp/quads_output.log | head -1 || true); \ + if [ -z "$$ASSIGNMENT_ID" ]; then \ + ASSIGNMENT_ID=$$(grep -oP '"assignment_id":\s*\K[0-9]+' /tmp/quads_output.log | head -1 || true); \ + fi; \ + echo "CLOUD_NAME=$$CLOUD_NAME" > $(STATE_FILE); \ + echo "ASSIGNMENT_ID=$$ASSIGNMENT_ID" >> $(STATE_FILE); \ + echo -e "$(GREEN)Assignment scheduled: $$CLOUD_NAME (ID: $$ASSIGNMENT_ID)$(NC)"; \ + if [ -n "$$ASSIGNMENT_ID" ]; then \ + echo "Waiting for assignment validation..."; \ + QUADS_HOST="$$QUADS_API_SERVER"; \ + while true; do \ + STATUS=$$(curl -s "https://$$QUADS_HOST/api/v3/assignments/$$ASSIGNMENT_ID" | jq -r '.validated | tostring'); \ + if [ "$$STATUS" = "true" ]; then \ + echo -e "$(GREEN)Assignment validated and ready!$(NC)"; \ + break; \ + elif [ "$$STATUS" = "null" ] || [ -z "$$STATUS" ]; then \ + echo -e "$(YELLOW)Could not check validation status, proceeding anyway...$(NC)"; \ + break; \ + fi; \ + echo "Waiting for validation... (status: $$STATUS)"; \ + sleep 30; \ + done; \ + fi; \ + echo -e "$(GREEN)Assignment complete: $$CLOUD_NAME$(NC)"; \ + fi + +#------------------------------------------------------------------------------ +# Phase: Generate Jetlag Inventory +#------------------------------------------------------------------------------ + +# User-facing target: prompts for jetlag config only +inventory: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: Generate Jetlag Inventory$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @$(SCRIPT_DIR)/prompt-config.sh jetlag + @$(MAKE) --no-print-directory inventory-run + +# Internal target: runs without prompting (config must exist) +inventory-run: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: Generate Jetlag Inventory$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @set -a && source $(CONFIG_FILE) && source $(STATE_FILE) && set +a && \ + echo "Checking for r630 servers..."; \ + RESET_IDRAC="false"; \ + if $(SCRIPT_DIR)/check-r630.sh "$$LAB" "$$CLOUD_NAME" "$$QUADS_API_SERVER" | grep -q "r630"; then \ + echo -e "$(YELLOW)Detected r630 servers - enabling reset_idrac$(NC)"; \ + RESET_IDRAC="true"; \ + fi; \ + echo "Generating jetlag configuration..."; \ + cp $(JETLAG_ROOT)/ansible/vars/all.sample.yml $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i "s/^lab:$$/lab: $$LAB/" $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i "s/^lab_cloud:$$/lab_cloud: $$CLOUD_NAME/" $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i "s/^cluster_type:$$/cluster_type: $$CLUSTER_TYPE/" $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i "s/^worker_node_count:$$/worker_node_count: $$WORKER_NODE_COUNT/" $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i "s/^ocp_build: .*/ocp_build: \"$$OCP_BUILD\"/" $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i "s/^ocp_version: .*/ocp_version: \"$$OCP_VERSION\"/" $(JETLAG_ROOT)/ansible/vars/all.yml; \ + if [ "$$NETWORK_STACK" = "ipv4" ]; then \ + sed -i 's/^setup_bastion_registry: true$$/setup_bastion_registry: false/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^use_bastion_registry: true$$/use_bastion_registry: false/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + elif [ "$$NETWORK_STACK" = "ipv6" ]; then \ + sed -i 's/^- 198.18.0.0\/16$$/- fd00:198:18:10::\/64/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^- 16$$/- 64/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^- 10.128.0.0\/14$$/- fd01::\/48/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^- 23$$/- 64/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^- 172.30.0.0\/16$$/- fd02::\/112/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^setup_bastion_registry: false$$/setup_bastion_registry: true/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^use_bastion_registry: false$$/use_bastion_registry: true/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + elif [ "$$NETWORK_STACK" = "dual" ]; then \ + sed -i '/^- 198.18.0.0\/16$$/a - fd00:198:18:10::\/64' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i '/^- 16$$/a - 64' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i '/^- 10.128.0.0\/14$$/a - fd01::\/48' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i '/^- 23$$/a - 64' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i '/^- 172.30.0.0\/16$$/a - fd02::\/112' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^setup_bastion_registry: true$$/setup_bastion_registry: false/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + sed -i 's/^use_bastion_registry: true$$/use_bastion_registry: false/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + fi; \ + if [ "$$RESET_IDRAC" = "true" ]; then \ + sed -i 's/^# reset_idrac: false$$/reset_idrac: true/' $(JETLAG_ROOT)/ansible/vars/all.yml; \ + fi; \ + if [ ! -f "$$PULL_SECRET_PATH" ]; then \ + echo -e "$(RED)Error: Pull secret not found at $$PULL_SECRET_PATH$(NC)"; \ + exit 1; \ + fi; \ + if [ "$$(realpath "$$PULL_SECRET_PATH")" != "$$(realpath $(JETLAG_ROOT)/pull-secret.txt)" ]; then \ + echo "Copying pull secret to jetlag root..."; \ + cp "$$PULL_SECRET_PATH" $(JETLAG_ROOT)/pull-secret.txt; \ + else \ + echo "Pull secret already in jetlag root."; \ + fi; \ + echo "Running jetlag create-inventory..."; \ + cd $(JETLAG_ROOT) && \ + $(ACTIVATE_VENV) && ansible-playbook ansible/create-inventory.yml; \ + echo -e "$(GREEN)Inventory generated: $(JETLAG_ROOT)/ansible/inventory/$$CLOUD_NAME.local$(NC)"; \ + echo ""; \ + echo "Copying SSH key to bastion host..."; \ + BASTION_HOST=$$(grep -A1 '^\[bastion\]' $(JETLAG_ROOT)/ansible/inventory/$$CLOUD_NAME.local | tail -1 | awk '{print $$1}'); \ + if [ -n "$$BASTION_HOST" ] && [ -n "$$BASTION_ROOT_PASSWORD" ]; then \ + echo "Bastion host: $$BASTION_HOST"; \ + sshpass -p "$$BASTION_ROOT_PASSWORD" ssh-copy-id -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@$$BASTION_HOST 2>/dev/null && \ + echo -e "$(GREEN)SSH key copied to bastion successfully.$(NC)" || \ + echo -e "$(YELLOW)Warning: Could not copy SSH key. You may need to do this manually.$(NC)"; \ + else \ + echo -e "$(YELLOW)Warning: Bastion host or password not available. SSH key not copied.$(NC)"; \ + fi + +#------------------------------------------------------------------------------ +# Phase: Setup Bastion +#------------------------------------------------------------------------------ + +# User-facing target (no prompts needed, uses existing config) +bastion: bastion-run + +# Internal target +bastion-run: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: Setup Bastion$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @source $(CONFIG_FILE) && \ + source $(STATE_FILE) && \ + echo "Running jetlag setup-bastion..."; \ + cd $(JETLAG_ROOT) && \ + $(ACTIVATE_VENV) && ansible-playbook -i ansible/inventory/$$CLOUD_NAME.local ansible/setup-bastion.yml; \ + echo -e "$(GREEN)Bastion setup complete.$(NC)" + +#------------------------------------------------------------------------------ +# Phase: Deploy OCP Cluster +#------------------------------------------------------------------------------ + +# User-facing target (no prompts needed, uses existing config) +cluster: cluster-run + +# Internal target +cluster-run: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Phase: Deploy OCP Cluster$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @source $(CONFIG_FILE) && \ + source $(STATE_FILE) && \ + echo "Running jetlag $$DEPLOY_PLAYBOOK..."; \ + cd $(JETLAG_ROOT) && \ + $(ACTIVATE_VENV) && ansible-playbook -i ansible/inventory/$$CLOUD_NAME.local ansible/$$DEPLOY_PLAYBOOK; \ + echo -e "$(GREEN)Cluster deployment complete!$(NC)"; \ + echo ""; \ + echo "Access your cluster:"; \ + echo "export KUBECONFIG=$(JETLAG_ROOT)/ansible/kubeconfig" + +#------------------------------------------------------------------------------ +# Utility targets +#------------------------------------------------------------------------------ + +status: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Deployment Status$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @if [ -f "$(STATE_FILE)" ]; then \ + echo "State file: $(STATE_FILE)"; \ + cat $(STATE_FILE); \ + else \ + echo "No state file found. No active assignment."; \ + fi + @echo "" + @if [ -f "$(CONFIG_FILE)" ]; then \ + echo "Config file: $(CONFIG_FILE)"; \ + grep -v PASSWORD $(CONFIG_FILE) || true; \ + else \ + echo "No config file found."; \ + fi + +terminate-assignment: + @echo "" + @echo -e "$(BLUE)=============================================$(NC)" + @echo -e "$(BLUE)Terminate QUADS Assignment$(NC)" + @echo -e "$(BLUE)=============================================$(NC)" + @if [ ! -f "$(STATE_FILE)" ]; then \ + echo -e "$(RED)Error: No state file found. Nothing to release.$(NC)"; \ + exit 1; \ + fi + @if [ ! -f "$(CONFIG_FILE)" ]; then \ + echo -e "$(RED)Error: No config file found. Cannot authenticate with QUADS.$(NC)"; \ + exit 1; \ + fi + @source $(CONFIG_FILE) && \ + source $(STATE_FILE) && \ + if [ -z "$$ASSIGNMENT_ID" ]; then \ + echo -e "$(RED)Error: No ASSIGNMENT_ID found in state file.$(NC)"; \ + exit 1; \ + fi; \ + echo "Logging in to QUADS..."; \ + QUADS_EMAIL="$$QUADS_USERNAME@$$QUADS_USER_DOMAIN"; \ + TOKEN=$$(curl -s -X POST "https://$$QUADS_API_SERVER/api/v3/login/" \ + -u "$$QUADS_EMAIL:$$QUADS_PASSWORD" | jq -r '.auth_token // empty'); \ + if [ -z "$$TOKEN" ]; then \ + echo -e "$(RED)Error: Failed to authenticate with QUADS.$(NC)"; \ + exit 1; \ + fi; \ + echo "Terminating assignment $$ASSIGNMENT_ID (cloud: $$CLOUD_NAME)..."; \ + RESULT=$$(curl -s -k -X POST \ + -H "Authorization: Bearer $$TOKEN" \ + "https://$$QUADS_API_SERVER/api/v3/assignments/terminate/$$ASSIGNMENT_ID"); \ + echo "Response: $$RESULT"; \ + echo -e "$(GREEN)Allocation released.$(NC)"; \ + echo ""; \ + echo "Run 'make clean' to remove local state files." + +clean: + @echo "" + @echo -e "$(YELLOW)Cleaning generated files...$(NC)" + @rm -f $(CONFIG_FILE) + @rm -f $(STATE_FILE) + @rm -f /tmp/quads_output.log + @echo -e "$(GREEN)Clean complete.$(NC)" + @echo "" + @echo "Note: Cloned repository in $(REPOS_DIR) was not removed." + @echo "To remove it: rm -rf $(REPOS_DIR)" + +clean-all: clean + @echo -e "$(YELLOW)Removing cloned repository...$(NC)" + @rm -rf $(REPOS_DIR) + @echo -e "$(GREEN)All cleaned.$(NC)" + +help: + @echo "" + @echo -e "$(BLUE)jetlag self-sched-deploy - Interactive OCP Deployment$(NC)" + @echo "" + @echo "Usage: make [target]" + @echo "" + @echo "Main targets:" + @echo " all, deploy Full deployment pipeline (prompts for all config upfront)" + @echo " configure Prompt for all configuration parameters" + @echo "" + @echo "Individual phases (prompt for phase-specific config):" + @echo " bootstrap Setup jetlag Python virtual environment" + @echo " repos Clone/update ansible-quads-ssm repository" + @echo " create-assignment QUADS assignment (prompts for QUADS config)" + @echo " inventory Generate jetlag inventory (prompts for OCP config)" + @echo " bastion Setup bastion node" + @echo " cluster Deploy OCP cluster" + @echo "" + @echo "Utility targets:" + @echo " status Show current deployment status" + @echo " terminate-assignment Terminate QUADS assignment" + @echo " clean Remove generated config and state files" + @echo " clean-all Remove config, state, and cloned repository" + @echo " help Show this help message" + @echo "" + @echo "Examples:" + @echo " make # Full interactive deployment" + @echo " make create-assignment # Run only QUADS assignment" + @echo " make inventory # Run only inventory generation" + @echo " make bastion # Run only bastion setup" + @echo " make cluster # Run only cluster deployment" + @echo "" diff --git a/scripts/self-sched-deploy/README.md b/scripts/self-sched-deploy/README.md new file mode 100644 index 00000000..7299bd60 --- /dev/null +++ b/scripts/self-sched-deploy/README.md @@ -0,0 +1,160 @@ +# Self-Scheduling OCP Deployment + +Interactive OCP deployment with QUADS self-scheduling lab assignment. + +This tool provides an easy way to deploy an OpenShift cluster on Red Hat labs (Scale Lab, Performance Lab) by combining: +- **ansible-quads-ssm**: Self-service lab assignment via QUADS API +- **jetlag**: Assisted Installer-based OCP deployment + +## Prerequisites + +- **Ansible**: Version 2.9+ +- **GNU Make**: For running the Makefile +- **jq**: For JSON parsing (r630 server detection) +- **Pull Secret**: Download from [console.redhat.com](https://console.redhat.com/openshift/install/pull-secret) +- **SSH Keys**: Default `~/.ssh/id_rsa` and `~/.ssh/id_rsa.pub` +- **QUADS Account**: Access to the QUADS self-scheduling system + +## Quick Start + +```bash +# Navigate to the self-sched-deploy directory +cd scripts/self-sched-deploy + +# Run full interactive deployment +make +``` + +The tool will: +1. Prompt for all required configuration +2. Clone/update the ansible-quads-ssm repository +3. Create QUADS assignment for lab hosts +4. Generate jetlag inventory +5. Setup the bastion node +6. Deploy the OCP cluster + +## Usage + +### Full Deployment + +```bash +# Interactive deployment (prompts for all configuration) +make +``` + +### Individual Phases + +Each phase prompts for required configuration: + +```bash +make repos # Clone/update ansible-quads-ssm repository +make create-assignment # QUADS assignment +make inventory # Generate jetlag inventory +make bastion # Setup bastion node +make cluster # Deploy OCP cluster +``` + +### Re-running Phases + +To re-run specific phases after failures, simply run the individual target: + +```bash +# Re-run cluster deployment only +make cluster + +# Re-run bastion setup and cluster deployment +make bastion && make cluster +``` + +### Utility Commands + +```bash +make status # Show current deployment status +make clean # Remove generated config and state files +make clean-all # Also remove cloned repository +make help # Show all available targets +``` + +## Configuration Options + +The interactive prompts collect the following: + +### QUADS Configuration +- **API Server**: QUADS server URL (e.g., `quads2.rdu2.scalelab.redhat.com`) +- **Username**: Your username (without domain) +- **User Domain**: Email domain (e.g., `redhat.com`) +- **Password**: QUADS password + +### Lab Configuration +- **Lab**: `scalelab` or `performancelab` + +### OCP Configuration +- **Build Type**: `ga` (General Availability), `dev` (Development), or `ci` (Continuous Integration) +- **Version**: OCP version (e.g., `latest-4.17`, `candidate-4.17`, `4.19.0-0.nightly-2025-02-25-035256`) + +### Cluster Configuration +- **Cluster Type**: `mno` (Multi-Node OpenShift) or `sno` (Single-Node OpenShift) +- **Worker Count**: Number of worker nodes (MNO only) +- **Network Stack**: `ipv4`, `ipv6`, or `dual` (dual-stack) + +### Paths +- **Pull Secret**: Path to your `pull-secret.txt` file (default: jetlag root) + +## File Structure + +``` +scripts/self-sched-deploy/ +├── Makefile # Main orchestration +├── prompt-config.sh # Interactive configuration +├── check-r630.sh # r630 server detection script +├── vars/ +│ ├── config.env # Generated: current run config +│ └── state.env # Generated: assignment state +├── templates/ +│ └── quads_config.yml.j2 # QUADS config template +├── repos/ # Cloned repositories (gitignored) +│ └── ansible-quads-ssm/ +└── README.md +``` + +The jetlag configuration is generated by copying `ansible/vars/all.sample.yml` and modifying it with `sed` based on user configuration. + +## Automatic Assignment Detection + +When you run any target, the tool checks for an existing assignment in `vars/state.env`. If found, it prompts whether to reuse the existing assignment or create a new one. This allows you to: + +1. Resume a failed deployment without creating a new assignment +2. Re-run specific phases after fixing issues +3. Iterate on cluster configuration without waiting for new hosts + +## Network Stacks + +### IPv4 Single-Stack (default) +Standard IPv4-only deployment. No special requirements. + +### IPv6 Single-Stack +IPv6-only deployment. Automatically enables bastion registry for disconnected installation. + +### Dual-Stack +Both IPv4 and IPv6. Standard connected installation using the IPv4 network. + +## Automatic Features + +### r630 Server Detection +During inventory generation, the tool automatically detects if any allocated servers are Dell r630 models. If detected, it enables `reset_idrac: true` in the jetlag configuration to clear iDRAC job queues and reset the iDRAC service before deployment. + +## Troubleshooting + +### Assignment Failed +Check your QUADS credentials and ensure you have access to self-scheduling. + +### Inventory Generation Failed +Verify the cloud name in `vars/state.env` matches an active QUADS assignment. + +### Cluster Deployment Failed +Re-run with: +```bash +make cluster +``` + +The tool will prompt for configuration and use the existing assignment. diff --git a/scripts/self-sched-deploy/check-r630.sh b/scripts/self-sched-deploy/check-r630.sh new file mode 100755 index 00000000..5f16335d --- /dev/null +++ b/scripts/self-sched-deploy/check-r630.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Check if allocated servers include r630 models +# Downloads ocpinventory.json from QUADS and parses pm_addr fields +# +# Usage: ./check-r630.sh [quads_server] +# Returns: "r630" and exit 0 if r630 found, "none" and exit 1 otherwise + +set -e + +LAB="${1:-scalelab}" +CLOUD_NAME="${2}" +QUADS_SERVER="${3}" + +if [[ -z "$CLOUD_NAME" ]]; then + echo "Usage: $0 [quads_server]" >&2 + exit 2 +fi + +# Check for required dependencies +if ! command -v jq &> /dev/null; then + echo "Error: jq is required but not installed" >&2 + exit 2 +fi + +# Use provided QUADS server or map from lab +if [[ -n "$QUADS_SERVER" ]]; then + QUADS_HOST="$QUADS_SERVER" +else + # Map lab to QUADS server (from jetlag ansible/vars/lab.yml) + case "$LAB" in + scalelab) + QUADS_HOST="quads2.rdu2.scalelab.redhat.com" + ;; + performancelab) + QUADS_HOST="quads2.rdu3.labs.perfscale.redhat.com" + ;; + *) + echo "Unknown lab: $LAB" >&2 + exit 2 + ;; + esac +fi + +# Download ocpinventory.json +INVENTORY_URL="http://${QUADS_HOST}/instack/${CLOUD_NAME}_ocpinventory.json" +INVENTORY_JSON=$(curl -s "$INVENTORY_URL") + +if [[ -z "$INVENTORY_JSON" || "$INVENTORY_JSON" == "null" ]]; then + echo "Failed to download inventory from $INVENTORY_URL" >&2 + exit 2 +fi + +# Extract server models from pm_addr fields +# Pattern: mgmt-[rack]-[unit]-[model].domain → extract [model] +MODELS=$(echo "$INVENTORY_JSON" | jq -r '.nodes[].pm_addr' | \ + sed 's/\..*//' | \ + sed 's/.*-//') + +# Check for r630 +if echo "$MODELS" | grep -q "^r630$"; then + echo "r630" + exit 0 +fi + +echo "none" +exit 1 diff --git a/scripts/self-sched-deploy/prompt-config.sh b/scripts/self-sched-deploy/prompt-config.sh new file mode 100755 index 00000000..0d4faba2 --- /dev/null +++ b/scripts/self-sched-deploy/prompt-config.sh @@ -0,0 +1,382 @@ +#!/bin/bash +# Interactive configuration prompts for jetlag self-sched-deploy +# This script collects user input and outputs vars/config.env +# +# Usage: prompt-config.sh [mode] +# mode: quads - Only QUADS-related prompts (for create-assignment) +# jetlag - Only jetlag-related prompts (for inventory) +# all - All prompts (default) + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +JETLAG_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +CONFIG_FILE="${SCRIPT_DIR}/vars/config.env" +STATE_FILE="${SCRIPT_DIR}/vars/state.env" + +# Mode parameter +MODE="${1:-all}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;36m' +NC='\033[0m' # No Color + +print_header() { + echo "" + echo -e "${BLUE}=============================================${NC}" + echo -e "${BLUE}$1${NC}" + echo -e "${BLUE}=============================================${NC}" +} + +print_info() { + echo -e "${GREEN}$1${NC}" +} + +print_warning() { + echo -e "${YELLOW}$1${NC}" +} + +print_error() { + echo -e "${RED}$1${NC}" +} + +# Prompt with default value +prompt_with_default() { + local prompt="$1" + local default="$2" + local var_name="$3" + local is_password="${4:-false}" + + if [[ "$is_password" == "true" ]]; then + read -r -s -p "$prompt [$default]: " value + echo "" + else + read -r -p "$prompt [$default]: " value + fi + + value="${value:-$default}" + eval "$var_name='$value'" +} + +# Prompt with options +prompt_with_options() { + local prompt="$1" + local options="$2" + local default="$3" + local var_name="$4" + + echo "$prompt" + echo "Options: $options" + read -r -p "Enter choice [$default]: " value + value="${value:-$default}" + eval "$var_name='$value'" +} + +# Check for existing state +check_existing_state() { + if [[ -f "$STATE_FILE" ]]; then + # shellcheck source=/dev/null + source "$STATE_FILE" + if [[ -n "$CLOUD_NAME" ]]; then + print_warning "Found existing assignment: $CLOUD_NAME" + read -r -p "Use existing assignment? (y/n) [y]: " use_existing + use_existing="${use_existing:-y}" + if [[ "$use_existing" =~ ^[Yy] ]]; then + export USE_EXISTING_ASSIGNMENT="true" + return 0 + fi + fi + fi + export USE_EXISTING_ASSIGNMENT="false" + return 0 +} + +# Load existing config if present +load_existing_config() { + if [[ -f "$CONFIG_FILE" ]]; then + # shellcheck source=/dev/null + source "$CONFIG_FILE" + fi +} + +# QUADS-related prompts (for create-assignment) +collect_quads_config() { + print_header "Lab Configuration" + + prompt_with_options "Select Lab" "scalelab, performancelab" "${LAB:-scalelab}" LAB + + # Auto-configure QUADS API server based on lab + case "$LAB" in + scalelab) + QUADS_API_SERVER="quads2.rdu2.scalelab.redhat.com" + ;; + performancelab) + QUADS_API_SERVER="quads2.rdu3.labs.perfscale.redhat.com" + ;; + esac + print_info "QUADS Server: $QUADS_API_SERVER" + + print_header "QUADS Credentials" + + prompt_with_default "QUADS Username (without domain)" "${QUADS_USERNAME:-}" QUADS_USERNAME + prompt_with_default "QUADS User Domain" "${QUADS_USER_DOMAIN:-redhat.com}" QUADS_USER_DOMAIN + prompt_with_default "QUADS Password" "" QUADS_PASSWORD true + + print_header "Cluster Configuration" + + prompt_with_options "Cluster Type" "mno (Multi-Node), sno (Single-Node)" "${CLUSTER_TYPE:-mno}" CLUSTER_TYPE + + if [[ "$CLUSTER_TYPE" == "mno" ]]; then + prompt_with_default "Worker Node Count" "${WORKER_NODE_COUNT:-2}" WORKER_NODE_COUNT + # Auto-calculate hosts: 1 bastion + 3 controlplane + workers + NUM_HOSTS=$((1 + 3 + WORKER_NODE_COUNT)) + else + WORKER_NODE_COUNT=0 + # SNO: 1 bastion + 1 SNO node + NUM_HOSTS=2 + fi + + print_info "Auto-calculated hosts to reserve: $NUM_HOSTS" + prompt_with_default "Number of hosts to reserve (override if needed)" "$NUM_HOSTS" NUM_HOSTS + + print_header "Workload Description" + + prompt_with_default "Workload description for QUADS" "${WORKLOAD_NAME:-OCP ${CLUSTER_TYPE} cluster}" WORKLOAD_NAME +} + +# Jetlag-related prompts (for inventory) +collect_jetlag_config() { + print_header "OCP Configuration" + + prompt_with_options "OCP Build Type" "ga, dev, ci" "${OCP_BUILD:-ga}" OCP_BUILD + + case "$OCP_BUILD" in + ga) + default_version="latest-4.20" + echo "For GA builds: latest-4.20, 4.20.1, latest-4.19, 4.19.5, etc." + ;; + dev) + default_version="candidate-4.20" + echo "For dev builds: candidate-4.20, candidate-4.19, latest" + ;; + ci) + default_version="4.20.0-0.nightly-2025-02-25-035256" + echo "For CI builds: 4.20.0-0.nightly-YYYY-MM-DD-HHMMSS" + ;; + esac + prompt_with_default "OCP Version" "${OCP_VERSION:-$default_version}" OCP_VERSION + + print_header "Network Configuration" + + echo "1) ipv4 - IPv4 single-stack" + echo "2) ipv6 - IPv6 single-stack" + echo "3) dual - Dual-stack (IPv4 + IPv6)" + prompt_with_options "Select Network Stack" "ipv4, ipv6, dual" "${NETWORK_STACK:-ipv4}" NETWORK_STACK + + # Normalize network stack input + case "$NETWORK_STACK" in + 1|ipv4) NETWORK_STACK="ipv4" ;; + 2|ipv6) NETWORK_STACK="ipv6" ;; + 3|dual) NETWORK_STACK="dual" ;; + esac + + print_header "Pull Secret" + + # Default pull secret path is in jetlag root + local default_pull_secret="${PULL_SECRET_PATH:-${JETLAG_ROOT}/pull-secret.txt}" + prompt_with_default "Path to pull-secret.txt" "$default_pull_secret" PULL_SECRET_PATH + + # Validate pull secret exists + if [[ ! -f "$PULL_SECRET_PATH" ]]; then + print_error "Warning: Pull secret file not found at $PULL_SECRET_PATH" + print_error "Please ensure the file exists before running deployment." + fi + + print_header "Bastion SSH Access" + + echo "Enter the root password for the bastion host to copy your SSH key." + prompt_with_default "Bastion root password" "" BASTION_ROOT_PASSWORD true + + # Set deploy playbook based on cluster type + if [[ "$CLUSTER_TYPE" == "mno" ]]; then + DEPLOY_PLAYBOOK="mno-deploy.yml" + else + DEPLOY_PLAYBOOK="sno-deploy.yml" + fi +} + +# Save configuration to file +save_config() { + print_header "Saving Configuration" + + cat > "$CONFIG_FILE" << EOF +# Generated configuration - $(date) +# Do not edit manually - regenerated on each run + +# QUADS Configuration +QUADS_API_SERVER="${QUADS_API_SERVER}" +QUADS_USERNAME="${QUADS_USERNAME}" +QUADS_USER_DOMAIN="${QUADS_USER_DOMAIN}" +QUADS_PASSWORD="${QUADS_PASSWORD}" + +# Lab Configuration +LAB="${LAB}" + +# OCP Configuration +OCP_BUILD="${OCP_BUILD}" +OCP_VERSION="${OCP_VERSION}" + +# Cluster Configuration +CLUSTER_TYPE="${CLUSTER_TYPE}" +WORKER_NODE_COUNT="${WORKER_NODE_COUNT}" +NUM_HOSTS="${NUM_HOSTS}" +DEPLOY_PLAYBOOK="${DEPLOY_PLAYBOOK}" + +# Network Configuration +NETWORK_STACK="${NETWORK_STACK}" + +# Paths +PULL_SECRET_PATH="${PULL_SECRET_PATH}" +JETLAG_ROOT="${JETLAG_ROOT}" + +# Bastion SSH Access +BASTION_ROOT_PASSWORD="${BASTION_ROOT_PASSWORD}" + +# Workload +WORKLOAD_NAME="${WORKLOAD_NAME}" + +# State flags +USE_EXISTING_ASSIGNMENT="${USE_EXISTING_ASSIGNMENT}" +EOF + + # Load cloud name from state if using existing assignment + if [[ "$USE_EXISTING_ASSIGNMENT" == "true" && -f "$STATE_FILE" ]]; then + # shellcheck source=/dev/null + source "$STATE_FILE" + { + echo "" + echo "# From existing state" + echo "CLOUD_NAME=\"${CLOUD_NAME}\"" + } >> "$CONFIG_FILE" + fi + + chmod 600 "$CONFIG_FILE" + print_info "Configuration saved to $CONFIG_FILE" +} + +# Display summary for QUADS mode +display_quads_summary() { + print_header "Configuration Summary" + + echo "QUADS Server: $QUADS_API_SERVER" + echo "QUADS User: $QUADS_USERNAME@$QUADS_USER_DOMAIN" + echo "Lab: $LAB" + echo "Cluster Type: $CLUSTER_TYPE" + echo "Worker Nodes: $WORKER_NODE_COUNT" + echo "Hosts to Reserve: $NUM_HOSTS" + echo "Workload: $WORKLOAD_NAME" + + if [[ "$USE_EXISTING_ASSIGNMENT" == "true" ]]; then + echo "" + print_warning "Using existing assignment: $CLOUD_NAME" + fi + + echo "" + read -r -p "Proceed with this configuration? (y/n) [y]: " confirm + confirm="${confirm:-y}" + if [[ ! "$confirm" =~ ^[Yy] ]]; then + print_error "Configuration cancelled." + exit 1 + fi +} + +# Display summary for jetlag mode +display_jetlag_summary() { + print_header "Configuration Summary" + + echo "OCP Build: $OCP_BUILD" + echo "OCP Version: $OCP_VERSION" + echo "Network Stack: $NETWORK_STACK" + echo "Pull Secret: $PULL_SECRET_PATH" + if [[ -n "$BASTION_ROOT_PASSWORD" ]]; then + echo "Bastion Password: (provided)" + else + echo "Bastion Password: (not provided - SSH key copy will be skipped)" + fi + + echo "" + read -r -p "Proceed with this configuration? (y/n) [y]: " confirm + confirm="${confirm:-y}" + if [[ ! "$confirm" =~ ^[Yy] ]]; then + print_error "Configuration cancelled." + exit 1 + fi +} + +# Display full summary +display_full_summary() { + print_header "Configuration Summary" + + echo "QUADS Server: $QUADS_API_SERVER" + echo "QUADS User: $QUADS_USERNAME@$QUADS_USER_DOMAIN" + echo "Lab: $LAB" + echo "OCP Build: $OCP_BUILD" + echo "OCP Version: $OCP_VERSION" + echo "Cluster Type: $CLUSTER_TYPE" + echo "Worker Nodes: $WORKER_NODE_COUNT" + echo "Hosts to Reserve: $NUM_HOSTS" + echo "Network Stack: $NETWORK_STACK" + echo "Pull Secret: $PULL_SECRET_PATH" + echo "Workload: $WORKLOAD_NAME" + if [[ -n "$BASTION_ROOT_PASSWORD" ]]; then + echo "Bastion Password: (provided)" + else + echo "Bastion Password: (not provided)" + fi + + if [[ "$USE_EXISTING_ASSIGNMENT" == "true" ]]; then + echo "" + print_warning "Using existing assignment: $CLOUD_NAME" + fi + + echo "" + read -r -p "Proceed with this configuration? (y/n) [y]: " confirm + confirm="${confirm:-y}" + if [[ ! "$confirm" =~ ^[Yy] ]]; then + print_error "Configuration cancelled." + exit 1 + fi +} + +# Main +main() { + case "$MODE" in + quads) + check_existing_state + load_existing_config + collect_quads_config + display_quads_summary + save_config + ;; + jetlag) + load_existing_config + collect_jetlag_config + display_jetlag_summary + save_config + ;; + all|*) + check_existing_state + load_existing_config + collect_quads_config + collect_jetlag_config + display_full_summary + save_config + ;; + esac + print_info "Configuration complete!" +} + +main "$@" diff --git a/scripts/self-sched-deploy/templates/quads_config.yml.j2 b/scripts/self-sched-deploy/templates/quads_config.yml.j2 new file mode 100644 index 00000000..e410d99d --- /dev/null +++ b/scripts/self-sched-deploy/templates/quads_config.yml.j2 @@ -0,0 +1,15 @@ +--- +# QUADS Server Configuration +# Generated by jetlag-self-sched + +# QUADS API Server +quads_api_server: "${QUADS_API_SERVER}" + +# User Details +quads_username: "${QUADS_USERNAME}" +quads_user_domain: "${QUADS_USER_DOMAIN}" +quads_password: "${QUADS_PASSWORD}" + +# Host Selection Preferences +# Set to "all" to accept any available model +preferred_models: "all" From ecdf39e130bb8058ff39f8693cfc88ac2e934d2b Mon Sep 17 00:00:00 2001 From: Marius Cornea Date: Mon, 19 Jan 2026 12:58:26 +0200 Subject: [PATCH 2/3] Retrieve quads api server from ansible/vars/lab.yml --- bootstrap.sh | 1 + scripts/self-sched-deploy/check-r630.sh | 29 +++++++++++----------- scripts/self-sched-deploy/prompt-config.sh | 21 +++++++++------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 67490887..cbb2f298 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -4,4 +4,5 @@ source .ansible/bin/activate pip3 install -q --upgrade pip pip3 install -q 'ansible<12.0.0' netaddr pip3 install -q jmespath --force +pip3 install -q yq ansible-galaxy collection install ansible.utils --force diff --git a/scripts/self-sched-deploy/check-r630.sh b/scripts/self-sched-deploy/check-r630.sh index 5f16335d..5d252424 100755 --- a/scripts/self-sched-deploy/check-r630.sh +++ b/scripts/self-sched-deploy/check-r630.sh @@ -7,6 +7,14 @@ set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +JETLAG_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +# Activate venv to access yq +if [[ -f "${JETLAG_ROOT}/.ansible/bin/activate" ]]; then + source "${JETLAG_ROOT}/.ansible/bin/activate" +fi + LAB="${1:-scalelab}" CLOUD_NAME="${2}" QUADS_SERVER="${3}" @@ -22,23 +30,16 @@ if ! command -v jq &> /dev/null; then exit 2 fi -# Use provided QUADS server or map from lab +# Use provided QUADS server or map from lab (from ansible/vars/lab.yml) if [[ -n "$QUADS_SERVER" ]]; then QUADS_HOST="$QUADS_SERVER" else - # Map lab to QUADS server (from jetlag ansible/vars/lab.yml) - case "$LAB" in - scalelab) - QUADS_HOST="quads2.rdu2.scalelab.redhat.com" - ;; - performancelab) - QUADS_HOST="quads2.rdu3.labs.perfscale.redhat.com" - ;; - *) - echo "Unknown lab: $LAB" >&2 - exit 2 - ;; - esac + LAB_YML="${JETLAG_ROOT}/ansible/vars/lab.yml" + QUADS_HOST=$(yq -r ".labs.${LAB}.quads" "$LAB_YML") + if [[ -z "$QUADS_HOST" || "$QUADS_HOST" == "null" ]]; then + echo "Error: Could not find QUADS server for lab '$LAB' in $LAB_YML" >&2 + exit 2 + fi fi # Download ocpinventory.json diff --git a/scripts/self-sched-deploy/prompt-config.sh b/scripts/self-sched-deploy/prompt-config.sh index 0d4faba2..b37e3c4e 100755 --- a/scripts/self-sched-deploy/prompt-config.sh +++ b/scripts/self-sched-deploy/prompt-config.sh @@ -12,6 +12,11 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" JETLAG_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" CONFIG_FILE="${SCRIPT_DIR}/vars/config.env" + +# Activate venv to access yq +if [[ -f "${JETLAG_ROOT}/.ansible/bin/activate" ]]; then + source "${JETLAG_ROOT}/.ansible/bin/activate" +fi STATE_FILE="${SCRIPT_DIR}/vars/state.env" # Mode parameter @@ -108,15 +113,13 @@ collect_quads_config() { prompt_with_options "Select Lab" "scalelab, performancelab" "${LAB:-scalelab}" LAB - # Auto-configure QUADS API server based on lab - case "$LAB" in - scalelab) - QUADS_API_SERVER="quads2.rdu2.scalelab.redhat.com" - ;; - performancelab) - QUADS_API_SERVER="quads2.rdu3.labs.perfscale.redhat.com" - ;; - esac + # Auto-configure QUADS API server based on lab (from ansible/vars/lab.yml) + LAB_YML="${JETLAG_ROOT}/ansible/vars/lab.yml" + QUADS_API_SERVER=$(yq -r ".labs.${LAB}.quads" "$LAB_YML") + if [[ -z "$QUADS_API_SERVER" || "$QUADS_API_SERVER" == "null" ]]; then + print_error "Error: Could not find QUADS server for lab '$LAB' in $LAB_YML" + exit 1 + fi print_info "QUADS Server: $QUADS_API_SERVER" print_header "QUADS Credentials" From 463ae7e59f3b8e781def2d61caf208ef3229ff31 Mon Sep 17 00:00:00 2001 From: Marius Cornea Date: Fri, 13 Feb 2026 14:13:15 +0200 Subject: [PATCH 3/3] Update inventory/kubeconfig path and inventory file retries --- scripts/self-sched-deploy/Makefile | 15 ++++++++++----- scripts/self-sched-deploy/README.md | 5 ++++- scripts/self-sched-deploy/check-r630.sh | 22 ++++++++++++++++------ scripts/self-sched-deploy/prompt-config.sh | 1 + 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/scripts/self-sched-deploy/Makefile b/scripts/self-sched-deploy/Makefile index 2375320f..0d0e690a 100644 --- a/scripts/self-sched-deploy/Makefile +++ b/scripts/self-sched-deploy/Makefile @@ -38,10 +38,7 @@ deploy: configure bootstrap repos create-assignment-run inventory-run bastion-ru @if [ -f "$(STATE_FILE)" ]; then \ source $(STATE_FILE) && \ echo "Cloud: $$CLOUD_NAME" && \ - echo "Cluster config: $(JETLAG_ROOT)/ansible/$$CLOUD_NAME" && \ - echo "" && \ - echo "Access your cluster:" && \ - echo "export KUBECONFIG=$(JETLAG_ROOT)/ansible/kubeconfig"; \ + echo "Inventory: $(JETLAG_ROOT)/ansible/inventory/$$CLOUD_NAME.local"; \ fi # Configure: prompt for all parameters upfront @@ -276,8 +273,16 @@ cluster-run: $(ACTIVATE_VENV) && ansible-playbook -i ansible/inventory/$$CLOUD_NAME.local ansible/$$DEPLOY_PLAYBOOK; \ echo -e "$(GREEN)Cluster deployment complete!$(NC)"; \ echo ""; \ + BASTION_HOST=$$(grep -v '^#' $(JETLAG_ROOT)/ansible/inventory/$$CLOUD_NAME.local | grep -A1 '^\[bastion\]' | tail -1 | awk '{print $$1}'); \ + if [ "$$CLUSTER_TYPE" = "sno" ]; then \ + SNO_NAME=$$(grep -v '^#' $(JETLAG_ROOT)/ansible/inventory/$$CLOUD_NAME.local | grep -A1 '^\[sno\]' | tail -1 | awk '{print $$1}'); \ + KUBECONFIG_PATH="/root/sno/$$SNO_NAME/kubeconfig"; \ + else \ + KUBECONFIG_PATH="/root/$$CLUSTER_TYPE/kubeconfig"; \ + fi; \ echo "Access your cluster:"; \ - echo "export KUBECONFIG=$(JETLAG_ROOT)/ansible/kubeconfig" + echo " ssh root@$$BASTION_HOST"; \ + echo " export KUBECONFIG=$$KUBECONFIG_PATH" #------------------------------------------------------------------------------ # Utility targets diff --git a/scripts/self-sched-deploy/README.md b/scripts/self-sched-deploy/README.md index 7299bd60..08b1db42 100644 --- a/scripts/self-sched-deploy/README.md +++ b/scripts/self-sched-deploy/README.md @@ -8,7 +8,7 @@ This tool provides an easy way to deploy an OpenShift cluster on Red Hat labs (S ## Prerequisites -- **Ansible**: Version 2.9+ +- **Jetlag environment**: Run `source bootstrap.sh` from the jetlag repo root to setup the virtual environment - **GNU Make**: For running the Makefile - **jq**: For JSON parsing (r630 server detection) - **Pull Secret**: Download from [console.redhat.com](https://console.redhat.com/openshift/install/pull-secret) @@ -18,6 +18,9 @@ This tool provides an easy way to deploy an OpenShift cluster on Red Hat labs (S ## Quick Start ```bash +# Bootstrap jetlag environment (from repo root) +source bootstrap.sh + # Navigate to the self-sched-deploy directory cd scripts/self-sched-deploy diff --git a/scripts/self-sched-deploy/check-r630.sh b/scripts/self-sched-deploy/check-r630.sh index 5d252424..9cbaaca4 100755 --- a/scripts/self-sched-deploy/check-r630.sh +++ b/scripts/self-sched-deploy/check-r630.sh @@ -42,14 +42,24 @@ else fi fi -# Download ocpinventory.json +# Download ocpinventory.json, retrying until nodes data is available INVENTORY_URL="http://${QUADS_HOST}/instack/${CLOUD_NAME}_ocpinventory.json" -INVENTORY_JSON=$(curl -s "$INVENTORY_URL") +MAX_RETRIES=12 +RETRY_INTERVAL=10 -if [[ -z "$INVENTORY_JSON" || "$INVENTORY_JSON" == "null" ]]; then - echo "Failed to download inventory from $INVENTORY_URL" >&2 - exit 2 -fi +for ((i=1; i<=MAX_RETRIES; i++)); do + INVENTORY_JSON=$(curl -s "$INVENTORY_URL") + if [[ -n "$INVENTORY_JSON" && "$INVENTORY_JSON" != "null" ]] && \ + echo "$INVENTORY_JSON" | jq -e '.nodes | length > 0' &>/dev/null; then + break + fi + if [[ $i -eq $MAX_RETRIES ]]; then + echo "Error: ocpinventory.json not available after $MAX_RETRIES attempts" >&2 + exit 2 + fi + echo "Waiting for ocpinventory.json to be ready... (attempt $i/$MAX_RETRIES)" >&2 + sleep "$RETRY_INTERVAL" +done # Extract server models from pm_addr fields # Pattern: mgmt-[rack]-[unit]-[model].domain → extract [model] diff --git a/scripts/self-sched-deploy/prompt-config.sh b/scripts/self-sched-deploy/prompt-config.sh index b37e3c4e..23f9c31a 100755 --- a/scripts/self-sched-deploy/prompt-config.sh +++ b/scripts/self-sched-deploy/prompt-config.sh @@ -11,6 +11,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" JETLAG_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +mkdir -p "${SCRIPT_DIR}/vars" CONFIG_FILE="${SCRIPT_DIR}/vars/config.env" # Activate venv to access yq