From 240f5420bfe02fb2ec1bcd30c012f73a29b9bb6e Mon Sep 17 00:00:00 2001 From: Srikanth Muppandam Date: Mon, 8 Dec 2025 11:30:46 +0530 Subject: [PATCH 1/3] lib_performance: centralize systemd-analyze wait logic Expose wait_analyze_ready() to poll systemd-analyze time safely. Record systemctl list-jobs whenever boot is not yet finished. Allow configurable timeout and poll interval via environment variables. Prepare shared helpers for reuse across performance KPI test suites. Signed-off-by: Srikanth Muppandam --- Runner/utils/lib_performance.sh | 962 ++++++++++++++++++++++++++++++++ 1 file changed, 962 insertions(+) create mode 100755 Runner/utils/lib_performance.sh diff --git a/Runner/utils/lib_performance.sh b/Runner/utils/lib_performance.sh new file mode 100755 index 00000000..a27a1d91 --- /dev/null +++ b/Runner/utils/lib_performance.sh @@ -0,0 +1,962 @@ +#!/bin/sh +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear +# Common performance-related helpers for KPI-style tests. +# --------------------------------------------------------------------------- +# Generic timestamp + escaping +# --------------------------------------------------------------------------- + +nowstamp() { + date -u +%Y%m%dT%H%M%SZ 2>/dev/null || date +%s +} + +# Basic JSON string escaper (used by KPI tests) +esc() { + # Escape backslash and double-quote + printf '%s' "$1" \ + | sed 's/\\/\\\\/g;s/"/\\"/g' +} + +# --------------------------------------------------------------------------- +# CPU governor helpers +# --------------------------------------------------------------------------- + +# Put all CPUs into performance governor, saving previous governor for restore. +# Uses SAVED_GOV_FILE (auto set if not provided). +set_performance_governor() { + SAVED_GOV_FILE="${SAVED_GOV_FILE:-/tmp/perf_saved_governors.$$}" + : >"$SAVED_GOV_FILE" 2>/dev/null || return 0 + + for c in /sys/devices/system/cpu/cpu[0-9]*; do + [ -d "$c" ] || continue + gov_file="$c/cpufreq/scaling_governor" + [ -f "$gov_file" ] || continue + + cur_gov=$(cat "$gov_file" 2>/dev/null || echo "") + # Record current governor + printf '%s:%s\n' "$gov_file" "$cur_gov" >>"$SAVED_GOV_FILE" 2>/dev/null || true + + # Try to set performance, but do not fail test if it does not exist + echo performance >"$gov_file" 2>/dev/null || true + done + + if command -v log_info >/dev/null 2>&1; then + log_info "CPU governors set to performance (saved in $SAVED_GOV_FILE)" + fi +} + +# Restore governors from the temp file created by set_performance_governor() +restore_governor() { + if [ -z "${SAVED_GOV_FILE:-}" ]; then + return 0 + fi + if [ ! -f "$SAVED_GOV_FILE" ]; then + return 0 + fi + + while IFS= read -r line; do + [ -n "$line" ] || continue + gov_file=${line%%:*} + old_gov=${line#*:} + [ -f "$gov_file" ] || continue + [ -n "$old_gov" ] || continue + echo "$old_gov" >"$gov_file" 2>/dev/null || true + done <"$SAVED_GOV_FILE" + + rm -f "$SAVED_GOV_FILE" 2>/dev/null || true + + if command -v log_info >/dev/null 2>&1; then + log_info "Restored original CPU governors from saved state" + fi +} + +# --------------------------------------------------------------------------- +# Clocksource +# --------------------------------------------------------------------------- + +# Capture the current clocksource into a text file. +# Usage: capture_clocksource /path/to/file +capture_clocksource() { + out_file=$1 + [ -n "$out_file" ] || out_file="./clocksource.txt" + + if [ -r /sys/devices/system/clocksource/clocksource0/current_clocksource ]; then + cs=$(cat /sys/devices/system/clocksource/clocksource0/current_clocksource 2>/dev/null || echo "unknown") + { + echo "timestamp=$(nowstamp)" + echo "clocksource=$cs" + } >"$out_file" 2>/dev/null || true + + if command -v log_info >/dev/null 2>&1; then + log_info "Clocksource: $cs → $out_file" + fi + else + if command -v log_warn >/dev/null 2>&1; then + log_warn "current_clocksource not available; skipping clocksource capture" + fi + fi +} + +# --------------------------------------------------------------------------- +# Boot type tag +# --------------------------------------------------------------------------- + +# Capture boot type tag (cold/warm/unknown) into a text file. +# Usage: capture_boot_type +capture_boot_type() { + tag=$1 + out_file=$2 + + [ -n "$tag" ] || tag="unknown" + [ -n "$out_file" ] || out_file="./boot_type.txt" + + { + echo "timestamp=$(nowstamp)" + echo "boot_type=$tag" + } >"$out_file" 2>/dev/null || true + + if command -v log_info >/dev/null 2>&1; then + log_info "Boot type tagged as '$tag' → $out_file" + fi +} + +# --------------------------------------------------------------------------- +# System services / “heavy” log producers +# --------------------------------------------------------------------------- + +# Optionally disable heavy services for KPI runs. +# Usage: disable_heavy_services_if_requested +# Flags are "1" to disable, anything else to leave alone. +disable_heavy_services_if_requested() { + disable_getty=$1 + disable_sshd=$2 + + if ! command -v systemctl >/dev/null 2>&1; then + if command -v log_warn >/dev/null 2>&1; then + log_warn "systemctl not found; cannot apply getty/sshd KPI tweaks" + fi + return 0 + fi + + if [ "$disable_getty" = "1" ]; then + systemctl disable serial-getty@ttyS0.service >/dev/null 2>&1 || true + systemctl stop serial-getty@ttyS0.service >/dev/null 2>&1 || true + if command -v log_info >/dev/null 2>&1; then + log_info "Disabled serial-getty@ttyS0.service for KPI run" + fi + fi + + if [ "$disable_sshd" = "1" ]; then + systemctl disable sshd.service >/dev/null 2>&1 || true + systemctl stop sshd.service >/dev/null 2>&1 || true + if command -v log_info >/dev/null 2>&1; then + log_info "Disabled sshd.service for KPI run" + fi + fi +} + +# --------------------------------------------------------------------------- +# Bootchart +# --------------------------------------------------------------------------- + +# Check if systemd-bootchart is enabled via kernel cmdline. +# Returns 0 if init=/lib/systemd/systemd-bootchart is present. +bootchart_enabled() { + if [ -r /proc/cmdline ]; then + grep -qw 'init=/lib/systemd/systemd-bootchart' /proc/cmdline 2>/dev/null + return $? + fi + return 1 +} + +# --------------------------------------------------------------------------- +# Boot KPI helpers: systemd-analyze time parsing + UEFI loader times + networkd +# --------------------------------------------------------------------------- + +# Convert a single duration token like "3.801s", "174ms", "2min" to seconds. +perf_time_token_to_sec() { + token="$1" + [ -n "$token" ] || { echo ""; return 0; } + + printf '%s\n' "$token" | awk ' + { + v = $1 + if (v ~ /min/) { + gsub(/[^0-9.]/, "", v) + if (v == "") { print ""; exit } + s = v * 60 + } else if (v ~ /ms$/) { + gsub(/[^0-9.]/, "", v) + if (v == "") { print ""; exit } + s = v / 1000.0 + } else if (v ~ /s$/) { + gsub(/[^0-9.]/, "", v) + if (v == "") { print ""; exit } + s = v + } else { + s = 0 + } + } + END { + if (s > 0) { + printf("%.3f\n", s) + } + }' +} + +# Convert a segment like "2min 7.045s" or "187ms" to seconds. +perf_time_segment_to_sec() { + seg="$1" + [ -n "$seg" ] || { echo ""; return 0; } + + printf '%s\n' "$seg" | awk ' + { + sec = 0 + for (i = 1; i <= NF; i++) { + v = $i + if (v ~ /min/) { + gsub(/[^0-9.]/, "", v) + if (v != "") sec += v * 60 + } else if (v ~ /ms$/) { + gsub(/[^0-9.]/, "", v) + if (v != "") sec += v / 1000.0 + } else if (v ~ /s$/) { + gsub(/[^0-9.]/, "", v) + if (v != "") sec += v + } + } + } + END { + if (sec > 0) { + printf("%.3f\n", sec) + } + }' +} + +# Read UEFI loader times from efivars (if present) +# Sets: +# PERF_UEFI_INIT_SEC, PERF_UEFI_EXEC_SEC, PERF_UEFI_TOTAL_SEC +perf_read_uefi_loader_times() { + base="/sys/firmware/efi/efivars" + init_var="$base/LoaderTimeInitUSec-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f" + exec_var="$base/LoaderTimeExecUSec-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f" + + PERF_UEFI_INIT_SEC="" + PERF_UEFI_EXEC_SEC="" + PERF_UEFI_TOTAL_SEC="" + + if [ ! -r "$init_var" ] || [ ! -r "$exec_var" ]; then + export PERF_UEFI_INIT_SEC PERF_UEFI_EXEC_SEC PERF_UEFI_TOTAL_SEC + return 0 + fi + + init_us=$(tail -c 8 "$init_var" 2>/dev/null | od -An -t u8 2>/dev/null | awk '{print $1}') + exec_us=$(tail -c 8 "$exec_var" 2>/dev/null | od -An -t u8 2>/dev/null | awk '{print $1}') + + if [ -n "$init_us" ] && [ -n "$exec_us" ]; then + PERF_UEFI_INIT_SEC=$(printf '%s\n' "$init_us" | awk '{printf("%.3f", $1/1000000)}') + PERF_UEFI_EXEC_SEC=$(printf '%s\n' "$exec_us" | awk '{printf("%.3f", $1/1000000)}') + PERF_UEFI_TOTAL_SEC=$(printf '%s %s\n' "$PERF_UEFI_INIT_SEC" "$PERF_UEFI_EXEC_SEC" \ + | awk '{printf("%.3f", $1 + $2)}') + fi + + export PERF_UEFI_INIT_SEC PERF_UEFI_EXEC_SEC PERF_UEFI_TOTAL_SEC +} + +# Parse systemd-analyze time output + blame, and optionally exclude +# systemd-networkd-wait-online.service from userspace/total and an +# arbitrary list of services given via CLI. +# +# perf_parse_boot_times +# +# Sets: +# PERF_FIRMWARE_SEC +# PERF_LOADER_SEC +# PERF_KERNEL_SEC +# PERF_USERSPACE_SEC +# PERF_TOTAL_SEC +# PERF_NETWORKD_WAIT_ONLINE_SEC +# PERF_EXCLUDED_SERVICES_LIST +# PERF_EXCLUDED_SERVICES_SEC +# PERF_EXCLUDED_TOTAL_SEC (networkd + other excluded services) +# PERF_USERSPACE_EFFECTIVE_SEC +# PERF_TOTAL_EFFECTIVE_SEC +perf_parse_boot_times() { + at_file="$1" + blame_file="$2" + exclude_networkd="$3" + exclude_services_raw="$4" + + PERF_FIRMWARE_SEC="" + PERF_LOADER_SEC="" + PERF_KERNEL_SEC="" + PERF_USERSPACE_SEC="" + PERF_TOTAL_SEC="" + PERF_NETWORKD_WAIT_ONLINE_SEC="" + PERF_EXCLUDED_SERVICES_LIST="" + PERF_EXCLUDED_SERVICES_SEC="" + PERF_EXCLUDED_TOTAL_SEC="" + PERF_USERSPACE_EFFECTIVE_SEC="" + PERF_TOTAL_EFFECTIVE_SEC="" + + if [ ! -f "$at_file" ]; then + export PERF_FIRMWARE_SEC PERF_LOADER_SEC PERF_KERNEL_SEC PERF_USERSPACE_SEC PERF_TOTAL_SEC \ + PERF_NETWORKD_WAIT_ONLINE_SEC PERF_EXCLUDED_SERVICES_LIST PERF_EXCLUDED_SERVICES_SEC \ + PERF_EXCLUDED_TOTAL_SEC PERF_USERSPACE_EFFECTIVE_SEC PERF_TOTAL_EFFECTIVE_SEC + return 0 + fi + + line=$(grep -m1 'Startup finished in' "$at_file" 2>/dev/null || true) + if [ -z "$line" ]; then + export PERF_FIRMWARE_SEC PERF_LOADER_SEC PERF_KERNEL_SEC PERF_USERSPACE_SEC PERF_TOTAL_SEC \ + PERF_NETWORKD_WAIT_ONLINE_SEC PERF_EXCLUDED_SERVICES_LIST PERF_EXCLUDED_SERVICES_SEC \ + PERF_EXCLUDED_TOTAL_SEC PERF_USERSPACE_EFFECTIVE_SEC PERF_TOTAL_EFFECTIVE_SEC + return 0 + fi + + firmware_tok=$(printf '%s\n' "$line" \ + | sed -n 's/.*Startup finished in \([^ ]*\) (firmware).*/\1/p') + loader_tok=$(printf '%s\n' "$line" \ + | sed -n 's/.*(firmware) + \([^ ]*\) (loader).*/\1/p') + kernel_tok=$(printf '%s\n' "$line" \ + | sed -n 's/.*(loader) + \([^ ]*\) (kernel).*/\1/p') + userspace_seg=$(printf '%s\n' "$line" \ + | sed -n 's/.*(kernel) + \(.*\) (userspace) =.*/\1/p') + total_seg=$(printf '%s\n' "$line" \ + | sed -n 's/.*= \(.*\)$/\1/p') + + PERF_FIRMWARE_SEC=$(perf_time_token_to_sec "$firmware_tok") + PERF_LOADER_SEC=$(perf_time_token_to_sec "$loader_tok") + PERF_KERNEL_SEC=$(perf_time_token_to_sec "$kernel_tok") + PERF_USERSPACE_SEC=$(perf_time_segment_to_sec "$userspace_seg") + PERF_TOTAL_SEC=$(perf_time_segment_to_sec "$total_seg") + + # --- systemd-networkd-wait-online.service contribution --- + if [ "$exclude_networkd" = "1" ] && [ -f "$blame_file" ]; then + net_seg=$(grep 'systemd-networkd-wait-online.service' "$blame_file" 2>/dev/null \ + | head -n 1 | awk '{print $1, $2}') + PERF_NETWORKD_WAIT_ONLINE_SEC=$(perf_time_segment_to_sec "$net_seg") + fi + + # --- Generic exclude-services list (comma or space separated) --- + # We only look in blame_file, summing first match for each service. + EX_SVC_LIST="" + EX_SVC_TOTAL_SEC="" + if [ -n "$exclude_services_raw" ] && [ -f "$blame_file" ]; then + services=$(printf '%s\n' "$exclude_services_raw" | tr ',' ' ') + for svc in $services; do + [ -n "$svc" ] || continue + + # Avoid double-counting networkd if user also passed it in the list. + if [ "$exclude_networkd" = "1" ] && [ "$svc" = "systemd-networkd-wait-online.service" ]; then + continue + fi + + line_svc=$(grep " $svc\$" "$blame_file" 2>/dev/null | head -n 1) + [ -n "$line_svc" ] || continue + + seg_svc=$(printf '%s\n' "$line_svc" | awk '{print $1, $2}') + sec_svc=$(perf_time_segment_to_sec "$seg_svc") + [ -n "$sec_svc" ] || continue + + if [ -n "$EX_SVC_LIST" ]; then + EX_SVC_LIST="$EX_SVC_LIST,$svc" + else + EX_SVC_LIST="$svc" + fi + + if [ -n "$EX_SVC_TOTAL_SEC" ]; then + EX_SVC_TOTAL_SEC=$(printf '%s %s\n' "$EX_SVC_TOTAL_SEC" "$sec_svc" \ + | awk '{printf("%.3f", $1 + $2)}') + else + EX_SVC_TOTAL_SEC="$sec_svc" + fi + done + fi + + PERF_EXCLUDED_SERVICES_LIST="$EX_SVC_LIST" + PERF_EXCLUDED_SERVICES_SEC="$EX_SVC_TOTAL_SEC" + + # --- Aggregate excluded total (networkd + generic services) --- + EXCL_TOTAL="" + if [ "$exclude_networkd" = "1" ] && [ -n "$PERF_NETWORKD_WAIT_ONLINE_SEC" ]; then + EXCL_TOTAL="$PERF_NETWORKD_WAIT_ONLINE_SEC" + fi + if [ -n "$PERF_EXCLUDED_SERVICES_SEC" ]; then + if [ -n "$EXCL_TOTAL" ]; then + EXCL_TOTAL=$(printf '%s %s\n' "$EXCL_TOTAL" "$PERF_EXCLUDED_SERVICES_SEC" \ + | awk '{printf("%.3f", $1 + $2)}') + else + EXCL_TOTAL="$PERF_EXCLUDED_SERVICES_SEC" + fi + fi + PERF_EXCLUDED_TOTAL_SEC="$EXCL_TOTAL" + + PERF_USERSPACE_EFFECTIVE_SEC="$PERF_USERSPACE_SEC" + PERF_TOTAL_EFFECTIVE_SEC="$PERF_TOTAL_SEC" + + if [ -n "$EXCL_TOTAL" ] && [ -n "$PERF_USERSPACE_SEC" ] && [ -n "$PERF_TOTAL_SEC" ]; then + PERF_USERSPACE_EFFECTIVE_SEC=$(printf '%s %s\n' "$PERF_USERSPACE_SEC" "$EXCL_TOTAL" \ + | awk '{d = $1 - $2; if (d < 0) d = 0; printf("%.3f\n", d)}') + PERF_TOTAL_EFFECTIVE_SEC=$(printf '%s %s\n' "$PERF_TOTAL_SEC" "$EXCL_TOTAL" \ + | awk '{d = $1 - $2; if (d < 0) d = 0; printf("%.3f\n", d)}') + fi + + export PERF_FIRMWARE_SEC PERF_LOADER_SEC PERF_KERNEL_SEC PERF_USERSPACE_SEC PERF_TOTAL_SEC \ + PERF_NETWORKD_WAIT_ONLINE_SEC PERF_EXCLUDED_SERVICES_LIST PERF_EXCLUDED_SERVICES_SEC \ + PERF_EXCLUDED_TOTAL_SEC PERF_USERSPACE_EFFECTIVE_SEC PERF_TOTAL_EFFECTIVE_SEC +} + +# --------------------------------------------------------------------------- +# Boot-complete detection (multi-user.target) +# --------------------------------------------------------------------------- + +# Wait for multi-user.target up to seconds. +# Usage: wait_for_multi_user_target +wait_for_multi_user_target() { + timeout="$1" + + if ! command -v systemctl >/dev/null 2>&1; then + if command -v log_warn >/dev/null 2>&1; then + log_warn "systemctl not found; cannot verify multi-user.target boot-complete state" + fi + return 0 + fi + + i=0 + while [ "$i" -lt "$timeout" ]; do + if systemctl is-active --quiet multi-user.target; then + if command -v log_info >/dev/null 2>&1; then + log_info "Boot complete: multi-user.target is active" + fi + return 0 + fi + sleep 1 + i=$((i+1)) + done + + if systemctl is-active --quiet multi-user.target; then + if command -v log_info >/dev/null 2>&1; then + log_info "Boot complete: multi-user.target became active after timeout window" + fi + else + if command -v log_warn >/dev/null 2>&1; then + log_warn "multi-user.target not active after ${timeout}s; continuing KPI collection anyway" + fi + fi +} + +# --------------------------------------------------------------------------- +# Boot KPI loop helpers: state + systemd hook + KPI CSV / averages +# --------------------------------------------------------------------------- + +# Internal helper for safe double-quote escaping +_perf_kpi_escape_dq() { + printf '%s' "$1" | sed 's/\\/\\\\/g;s/"/\\"/g' +} + +# Write/refresh KPI loop state file. +# Usage: +# perf_kpi_write_loop_state STATE_FILE ITER_TOTAL ITER_DONE BOOT_TYPE \ +# DISABLE_GETTY DISABLE_SSHD EXCLUDE_NETWORKD EXCLUDE_SERVICES \ +# KPI_SCRIPT KPI_OUT_DIR +perf_kpi_write_loop_state() { + state_file=$1 + iter_total=$2 + iter_done=$3 + boot_type=$4 + disable_getty=$5 + disable_sshd=$6 + exclude_networkd=$7 + exclude_services=$8 + kpi_script=$9 + kpi_out_dir=${10} + + dir=$(dirname "$state_file") + mkdir -p "$dir" 2>/dev/null || true + + { + echo "KPI_LOOP_ITERATIONS_TOTAL=$iter_total" + echo "KPI_LOOP_ITERATIONS_DONE=$iter_done" + echo "KPI_LOOP_BOOT_TYPE=\"$(_perf_kpi_escape_dq "$boot_type")\"" + echo "KPI_LOOP_DISABLE_GETTY=$disable_getty" + echo "KPI_LOOP_DISABLE_SSHD=$disable_sshd" + echo "KPI_LOOP_EXCLUDE_NETWORKD=$exclude_networkd" + echo "KPI_LOOP_EXCLUDE_SERVICES=\"$(_perf_kpi_escape_dq "$exclude_services")\"" + echo "KPI_LOOP_KPI_SCRIPT=\"$(_perf_kpi_escape_dq "$kpi_script")\"" + echo "KPI_LOOP_KPI_OUT_DIR=\"$(_perf_kpi_escape_dq "$kpi_out_dir")\"" + } >"$state_file" 2>/dev/null || true + + if command -v log_info >/dev/null 2>&1; then + log_info "KPI loop state written to $state_file (done=$iter_done, total=$iter_total)" + fi +} + +# Load KPI loop state; exports KPI_LOOP_* vars if present. +# Returns 0 on success, 1 on missing file. +perf_kpi_load_loop_state() { + state_file=$1 + if [ ! -f "$state_file" ]; then + return 1 + fi + + # shellcheck disable=SC1090 + . "$state_file" + + export KPI_LOOP_ITERATIONS_TOTAL KPI_LOOP_ITERATIONS_DONE KPI_LOOP_BOOT_TYPE \ + KPI_LOOP_DISABLE_GETTY KPI_LOOP_DISABLE_SSHD KPI_LOOP_EXCLUDE_NETWORKD \ + KPI_LOOP_EXCLUDE_SERVICES KPI_LOOP_KPI_SCRIPT KPI_LOOP_KPI_OUT_DIR + + if command -v log_info >/dev/null 2>&1; then + log_info "Loaded KPI loop state from $state_file (done=${KPI_LOOP_ITERATIONS_DONE:-0}, total=${KPI_LOOP_ITERATIONS_TOTAL:-1})" + fi + return 0 +} + +# Install a systemd hook to run the KPI loop script at each boot. +# Usage: +# perf_install_kpi_systemd_hook /full/path/to/run.sh [service_name] +perf_install_kpi_systemd_hook() { + # $1 = full path to KPI loop script (e.g. /var/Runner/.../Boot_Systemd_KPI_Loop/run.sh) + # $2 = service base name (e.g. boot-systemd-kpi-loop) + kpi_script=$1 + svc_name=$2 + + if [ -z "$kpi_script" ] || [ -z "$svc_name" ]; then + log_error "perf_install_kpi_systemd_hook: missing script or service name" + return 1 + fi + + # Normalise service name (strip accidental .service/.timer) + case "$svc_name" in + *.service) svc_name=${svc_name%.service} ;; + *.timer) svc_name=${svc_name%.timer} ;; + esac + + # Resolve script dir; do NOT assume /var – use the real path + # shellcheck disable=SC2039 # POSIX sh does not guarantee dirname builtin, but /usr/bin/dirname exists + script_dir=$(dirname "$kpi_script") + unit_dir=/etc/systemd/system + + service_unit="$unit_dir/$svc_name.service" + timer_unit="$unit_dir/$svc_name.timer" + + log_info "Installing KPI loop systemd units: $service_unit + $timer_unit" + + # Service: just runs the script once; not bound to WantedBy targets directly + cat >"$service_unit" <"$timer_unit" </dev/null 2>&1; then + systemctl daemon-reload || true + systemctl enable --now "$svc_name.timer" || true + else + log_warn "systemctl not found, KPI loop units created but not enabled" + fi + + return 0 +} + +# Remove systemd hook and reload daemon. +# Usage: +# perf_remove_kpi_systemd_hook [service_name] +perf_remove_kpi_systemd_hook() { + # $1 = service base name (e.g. boot-systemd-kpi-loop or boot-systemd-kpi-loop.service) + svc_name=$1 + + if [ -z "$svc_name" ]; then + log_error "perf_remove_kpi_systemd_hook: missing service name" + return 1 + fi + + case "$svc_name" in + *.service) svc_name=${svc_name%.service} ;; + *.timer) svc_name=${svc_name%.timer} ;; + esac + + unit_dir=/etc/systemd/system + service_unit="$unit_dir/$svc_name.service" + timer_unit="$unit_dir/$svc_name.timer" + + log_info "Removing KPI loop systemd units: $service_unit + $timer_unit" + + if command -v systemctl >/dev/null 2>&1; then + systemctl disable --now "$svc_name.timer" 2>/dev/null || true + # Service is oneshot; usually inactive, but disable anyway in case it was enabled manually + systemctl disable "$svc_name.service" 2>/dev/null || true + fi + + rm -f "$timer_unit" "$service_unit" 2>/dev/null || true + + if command -v systemctl >/dev/null 2>&1; then + systemctl daemon-reload || true + fi + + return 0 +} + +# Wait for systemd-analyze time to report a finished boot +# $1 = analyze_time.txt path +# $2 = list_jobs_when_boot_unfinished.txt path +# $3 = max wait seconds (optional, default 180) +# $4 = poll interval seconds (optional, default 5) +wait_analyze_ready() { + out_file=$1 + jobs_file=$2 + max_wait=${3:-180} + interval=${4:-5} + + # Fallbacks if someone passes empty + [ -z "$max_wait" ] && max_wait=180 + [ -z "$interval" ] && interval=5 + + elapsed=0 + + while :; do + # Capture stdout + stderr + if systemd-analyze time >"$out_file" 2>&1; then + if grep -q "Bootup is not yet finished" "$out_file"; then + # Boot not finished yet according to systemd + log_warn "systemd-analyze: boot not finished yet (elapsed=${elapsed}s); capturing systemctl list-jobs → $jobs_file" + systemctl list-jobs >"$jobs_file" 2>&1 || true + + if [ "$elapsed" -ge "$max_wait" ]; then + log_warn "systemd-analyze: boot STILL not finished after ${elapsed}s; keeping analyze_time.txt as-is (KPI times may be 'unknown')." + # We return 1: caller treats KPI as degraded but does not fail test. + return 1 + fi + else + # We got a proper finished-boot line + log_info "systemd-analyze: boot finished; analyze_time.txt captured after ${elapsed}s." + return 0 + fi + else + rc=$? + log_warn "systemd-analyze time failed with rc=$rc; see $out_file for details." + # Don’t retry endlessly on hard failure, just return special code + return 2 + fi + + # Boot is not done yet, but we still have budget; sleep and retry + sleep "$interval" || break + elapsed=$((elapsed + interval)) + done + + # If we somehow break the loop without a clear result, treat as not finished + log_warn "systemd-analyze: exited wait loop without finished-boot output; see $out_file / $jobs_file." + return 1 +} + +# --------------------------------------------------------------------------- +# KPI file parsing + CSV append + averaging +# --------------------------------------------------------------------------- + +# Get " key : value" from KPI text file +# e.g. " boot_type : cold" → "cold" +kpi_get_line_val() { + key=$1 + file=$2 + sed -n "s/^ ${key} : //p" "$file" 2>/dev/null | head -n 1 +} + +# Extract first numeric token from a KPI line +# e.g. " boot_total_sec : 137.008" → "137.008" +# " uefi_time_sec : 438093.283 (Init=..., Exec=...)" → "438093.283" +kpi_get_num_from_line() { + key=$1 + file=$2 + val=$(kpi_get_line_val "$key" "$file") + printf '%s\n' "$val" | awk '{print $1}' +} + +# Extract metrics from boot_kpi_this_run.txt into PERF_KPI_* env vars +# Usage: perf_kpi_extract_from_file /path/to/boot_kpi_this_run.txt +perf_kpi_extract_from_file() { + file=$1 + + PERF_KPI_BOOT_TYPE=$(kpi_get_line_val "boot_type" "$file") + PERF_KPI_ITERATIONS_HINT=$(kpi_get_line_val "iterations" "$file") + PERF_KPI_CLOCKSOURCE=$(kpi_get_line_val "clocksource" "$file") + + PERF_KPI_UEFI_TIME_SEC=$(kpi_get_num_from_line "uefi_time_sec" "$file") + PERF_KPI_FIRMWARE_SEC=$(kpi_get_num_from_line "firmware_time_sec" "$file") + PERF_KPI_BOOTLOADER_SEC=$(kpi_get_num_from_line "bootloader_time_sec" "$file") + PERF_KPI_KERNEL_SEC=$(kpi_get_num_from_line "kernel_time_sec" "$file") + PERF_KPI_USERSPACE_SEC=$(kpi_get_num_from_line "userspace_time_sec" "$file") + PERF_KPI_USERSPACE_EFFECTIVE_SEC=$(kpi_get_num_from_line "userspace_effective_time_sec" "$file") + PERF_KPI_BOOT_TOTAL_SEC=$(kpi_get_num_from_line "boot_total_sec" "$file") + PERF_KPI_BOOT_TOTAL_EFFECTIVE_SEC=$(kpi_get_num_from_line "boot_total_effective_sec" "$file") + + export PERF_KPI_BOOT_TYPE PERF_KPI_ITERATIONS_HINT PERF_KPI_CLOCKSOURCE \ + PERF_KPI_UEFI_TIME_SEC PERF_KPI_FIRMWARE_SEC PERF_KPI_BOOTLOADER_SEC \ + PERF_KPI_KERNEL_SEC PERF_KPI_USERSPACE_SEC PERF_KPI_USERSPACE_EFFECTIVE_SEC \ + PERF_KPI_BOOT_TOTAL_SEC PERF_KPI_BOOT_TOTAL_EFFECTIVE_SEC +} + +# Append a CSV row using PERF_KPI_* vars. +# Optionally override boot_type via 2nd arg. +# Usage: +# perf_kpi_append_csv_row CSV_PATH [boot_type_override] +perf_kpi_append_csv_row() { + csv=$1 + override_bt=$2 + + bt=$override_bt + [ -n "$bt" ] || bt=$PERF_KPI_BOOT_TYPE + + if [ ! -f "$csv" ]; then + echo "timestamp,boot_type,iterations_hint,clocksource,uefi_time_sec,firmware_time_sec,bootloader_time_sec,kernel_time_sec,userspace_time_sec,userspace_effective_time_sec,boot_total_sec,boot_total_effective_sec" >"$csv" + fi + + ts=$(nowstamp) + echo "$ts,$bt,$PERF_KPI_ITERATIONS_HINT,$PERF_KPI_CLOCKSOURCE,$PERF_KPI_UEFI_TIME_SEC,$PERF_KPI_FIRMWARE_SEC,$PERF_KPI_BOOTLOADER_SEC,$PERF_KPI_KERNEL_SEC,$PERF_KPI_USERSPACE_SEC,$PERF_KPI_USERSPACE_EFFECTIVE_SEC,$PERF_KPI_BOOT_TOTAL_SEC,$PERF_KPI_BOOT_TOTAL_EFFECTIVE_SEC" >>"$csv" 2>/dev/null || true + + if command -v log_info >/dev/null 2>&1; then + log_info "Appended KPI row to $csv (boot_type=$bt, total_sec=${PERF_KPI_BOOT_TOTAL_SEC:-unknown}, total_eff_sec=${PERF_KPI_BOOT_TOTAL_EFFECTIVE_SEC:-unknown})" + fi +} + +# Compute averages for last N rows of a given boot_type into summary_file. +# Usage: +# perf_kpi_compute_average CSV_PATH BOOT_TYPE WINDOW SUMMARY_FILE +perf_kpi_compute_average() { + csv=$1 + bt=$2 + window=$3 + summary_file=$4 + + if [ ! -f "$csv" ]; then + if command -v log_warn >/dev/null 2>&1; then + log_warn "perf_kpi_compute_average: CSV not found: $csv" + fi + return 1 + fi + + tmp_filtered="${csv}.filtered.$$" + tmp_last="${csv}.last.$$" + + awk -F',' -v bt="$bt" ' + NR == 1 { next } + $2 == bt { print } + ' "$csv" >"$tmp_filtered" 2>/dev/null || true + + tail -n "$window" "$tmp_filtered" >"$tmp_last" 2>/dev/null || true + + if [ ! -s "$tmp_last" ]; then + rm -f "$tmp_filtered" "$tmp_last" 2>/dev/null || true + if command -v log_warn >/dev/null 2>&1; then + log_warn "perf_kpi_compute_average: no entries for boot_type=$bt" + fi + return 1 + fi + + awk -F',' -v bt="$bt" -v target="$window" ' + { + n++; + if ($5 ~ /^[0-9.]+$/) { uefi_sum += $5; uefi_n++ } + if ($6 ~ /^[0-9.]+$/) { fw_sum += $6; fw_n++ } + if ($7 ~ /^[0-9.]+$/) { bl_sum += $7; bl_n++ } + if ($8 ~ /^[0-9.]+$/) { k_sum += $8; k_n++ } + if ($9 ~ /^[0-9.]+$/) { us_sum += $9; us_n++ } + if ($10 ~ /^[0-9.]+$/) { use_sum += $10; use_n++ } + if ($11 ~ /^[0-9.]+$/) { tot_sum += $11; tot_n++ } + if ($12 ~ /^[0-9.]+$/) { tote_sum += $12; tote_n++ } + } + END { + if (n == 0) { exit 0 } + + if (uefi_n > 0) uefi_avg = uefi_sum / uefi_n; else uefi_avg = -1; + if (fw_n > 0) fw_avg = fw_sum / fw_n; else fw_avg = -1; + if (bl_n > 0) bl_avg = bl_sum / bl_n; else bl_avg = -1; + if (k_n > 0) k_avg = k_sum / k_n; else k_avg = -1; + if (us_n > 0) us_avg = us_sum / us_n; else us_avg = -1; + if (use_n > 0) use_avg = use_sum / use_n; else use_avg = -1; + if (tot_n > 0) tot_avg = tot_sum / tot_n; else tot_avg = -1; + if (tote_n > 0) tote_avg = tote_sum / tote_n; else tote_avg = -1; + + out = summary_file + printf("Boot KPI summary (last %d %s boot(s))\n", n, bt) > out + printf(" entries_used : %d\n", n) >> out + printf(" target_iterations : %d\n", target) >> out + printf(" boot_type : %s\n", bt) >> out + + if (uefi_avg >= 0) + printf(" avg_uefi_time_sec : %.3f\n", uefi_avg) >> out + if (fw_avg >= 0) + printf(" avg_firmware_time_sec : %.3f\n", fw_avg) >> out + if (bl_avg >= 0) + printf(" avg_bootloader_time_sec : %.3f\n", bl_avg) >> out + if (k_avg >= 0) + printf(" avg_kernel_time_sec : %.3f\n", k_avg) >> out + if (us_avg >= 0) + printf(" avg_userspace_time_sec : %.3f\n", us_avg) >> out + if (use_avg >= 0) + printf(" avg_userspace_effective_time_sec : %.3f\n", use_avg) >> out + if (tot_avg >= 0) + printf(" avg_boot_total_sec : %.3f\n", tot_avg) >> out + if (tote_avg >= 0) + printf(" avg_boot_total_effective_sec : %.3f\n", tote_avg) >> out + } + ' summary_file="$summary_file" "$tmp_last" + + rm -f "$tmp_filtered" "$tmp_last" 2>/dev/null || true + + if [ -f "$summary_file" ] && command -v log_info >/dev/null 2>&1; then + log_info "perf_kpi_compute_average: summary written to $summary_file" + fi +} + +# --------------------------------------------------------------------------- +# Boot identity + reboot tracking helpers for KPI loops +# --------------------------------------------------------------------------- + +# Capture current boot identity: +# - PERF_KPI_BOOT_ID = kernel boot_id (or "unknown") +# - PERF_KPI_UPTIME_SEC = uptime in seconds (float, or empty) +perf_kpi_get_boot_identity() { + PERF_KPI_BOOT_ID="unknown" + PERF_KPI_UPTIME_SEC="" + + if [ -r /proc/sys/kernel/random/boot_id ]; then + PERF_KPI_BOOT_ID=$(cat /proc/sys/kernel/random/boot_id 2>/dev/null || echo "unknown") + fi + + if [ -r /proc/uptime ]; then + PERF_KPI_UPTIME_SEC=$(awk '{printf("%.3f\n", $1)}' /proc/uptime 2>/dev/null || echo "") + fi + + export PERF_KPI_BOOT_ID PERF_KPI_UPTIME_SEC +} + +# State file layout: +# boot_id=... +# uptime_sec=... +# pending_reboot=0|1 +# iterations_done=N +perf_kpi_reboot_state_load() { + state_file=$1 + + PERF_KPI_STATE_BOOT_ID="" + PERF_KPI_STATE_UPTIME="" + PERF_KPI_STATE_PENDING="0" + PERF_KPI_STATE_ITER_DONE="" + + if [ -f "$state_file" ]; then + while IFS='=' read -r k v; do + case "$k" in + boot_id) PERF_KPI_STATE_BOOT_ID=$v ;; + uptime_sec) PERF_KPI_STATE_UPTIME=$v ;; + pending_reboot) PERF_KPI_STATE_PENDING=$v ;; + iterations_done) PERF_KPI_STATE_ITER_DONE=$v ;; + esac + done <"$state_file" + fi + + export PERF_KPI_STATE_BOOT_ID PERF_KPI_STATE_UPTIME \ + PERF_KPI_STATE_PENDING PERF_KPI_STATE_ITER_DONE +} + +perf_kpi_reboot_state_save() { + state_file=$1 + boot_id=$2 + uptime=$3 + pending=$4 + iter_done=$5 + + { + echo "boot_id=$boot_id" + echo "uptime_sec=$uptime" + echo "pending_reboot=$pending" + echo "iterations_done=$iter_done" + } >"$state_file" 2>/dev/null || true +} + +# Low-level "request reboot" helper. +# Does *not* manage any state, just tries hard to reboot. +perf_kpi_request_reboot() { + msg=$1 + + if command -v log_info >/dev/null 2>&1; then + log_info "Requesting reboot: $msg" + fi + + sync || true + + if command -v systemctl >/dev/null 2>&1; then + systemctl reboot || reboot || shutdown -r now || : + else + reboot || shutdown -r now || : + fi + + # If we are still alive after a short delay, try once more. + sleep 5 + if command -v systemctl >/dev/null 2>&1; then + systemctl reboot || reboot || shutdown -r now || : + else + reboot || shutdown -r now || : + fi +} + +# At the **start** of KPI loop: +# - Detect whether a previous reboot request actually produced a new boot. +# - If not, immediately re-issue reboot. +# - Logs uptimes for debugging. +perf_kpi_check_previous_reboot() { + state_file=$1 + + perf_kpi_reboot_state_load "$state_file" + perf_kpi_get_boot_identity + + # Nothing pending → nothing to do. + if [ "$PERF_KPI_STATE_PENDING" != "1" ] || [ -z "$PERF_KPI_STATE_BOOT_ID" ]; then + return 0 + fi + + if [ "$PERF_KPI_STATE_BOOT_ID" = "$PERF_KPI_BOOT_ID" ]; then + # Same boot-id as when we asked to reboot → reboot clearly did not happen. + if command -v log_warn >/dev/null 2>&1; then + log_warn "Previous reboot request did NOT change boot-id; re-issuing reboot now." + log_warn "Previous boot_id=$PERF_KPI_STATE_BOOT_ID uptime=${PERF_KPI_STATE_UPTIME:-unknown}s; current uptime=${PERF_KPI_UPTIME_SEC:-unknown}s" + fi + perf_kpi_request_reboot "Retrying failed reboot for KPI loop" + # Should not return if reboot succeeds; if it does, caller will just exit. + return 0 + fi + + # Boot-id changed → reboot successful, just log it and clear pending flag in state. + if command -v log_info >/dev/null 2>&1; then + log_info "Detected new boot after KPI reboot: old_boot_id=$PERF_KPI_STATE_BOOT_ID, new_boot_id=$PERF_KPI_BOOT_ID" + log_info "Previous uptime at reboot request=${PERF_KPI_STATE_UPTIME:-unknown}s, current uptime=${PERF_KPI_UPTIME_SEC:-unknown}s" + fi + + perf_kpi_reboot_state_save "$state_file" "$PERF_KPI_BOOT_ID" "$PERF_KPI_UPTIME_SEC" "0" "$PERF_KPI_STATE_ITER_DONE" +} From 0ca37fb2bfa1f56002e03bbbc623d5c5059daafd Mon Sep 17 00:00:00 2001 From: Srikanth Muppandam Date: Mon, 8 Dec 2025 11:31:23 +0530 Subject: [PATCH 2/3] Boot_Systemd_Validate: use wait_analyze_ready for robust timings - Replace the ad-hoc systemd-analyze polling loop with the shared helper. - Capture list-jobs when boot is unfinished and log this explicitly. - Honor extended boot times on slow platforms before parsing KPIs. - Preserve existing outputs while avoiding misleading 'boot not finished' results. Signed-off-by: Srikanth Muppandam --- .../Boot_Systemd_Validate.yaml | 37 ++ .../Systemd_Boot_KPI_Tests_Overview.md | 514 ++++++++++++++++ .../Performance/Boot_Systemd_Validate/run.sh | 564 ++++++++++++++++++ 3 files changed, 1115 insertions(+) create mode 100755 Runner/suites/Performance/Boot_Systemd_Validate/Boot_Systemd_Validate.yaml create mode 100644 Runner/suites/Performance/Boot_Systemd_Validate/Systemd_Boot_KPI_Tests_Overview.md create mode 100755 Runner/suites/Performance/Boot_Systemd_Validate/run.sh diff --git a/Runner/suites/Performance/Boot_Systemd_Validate/Boot_Systemd_Validate.yaml b/Runner/suites/Performance/Boot_Systemd_Validate/Boot_Systemd_Validate.yaml new file mode 100755 index 00000000..40a9052e --- /dev/null +++ b/Runner/suites/Performance/Boot_Systemd_Validate/Boot_Systemd_Validate.yaml @@ -0,0 +1,37 @@ +metadata: + name: boot-systemd-validate + format: "Lava-Test Test Definition 1.0" + description: "Systemd boot KPI capture + required unit gating + artifacts (critical-chain, blame, plot, unit states)." + os: + - linux + scope: + - performance + - functional + +params: + OUT_DIR: "./logs_Boot_Systemd_Validate" + + # Either provide REQUIRED_UNITS_FILE (existing file path) OR REQUIRED_UNITS (list). + # REQUIRED_UNITS supports commas/spaces and will be written to OUT_DIR/required_units.txt automatically. + REQUIRED_UNITS_FILE: "" + REQUIRED_UNITS: "" + + TIMEOUT_PER_UNIT: "30" + SVG: "yes" # yes|no + BOOT_TYPE: "unknown" # cold|warm|unknown etc + DISABLE_GETTY: "0" # 1|0 + DISABLE_SSHD: "0" # 1|0 + EXCLUDE_NETWORKD_WAIT_ONLINE: "0" # 1|0 + EXCLUDE_SERVICES: "" # space-separated service names + BOOT_KPI_ITERATIONS: "1" + VERBOSE: "0" + + # Optional improvement: configurable boot-complete wait (seconds) + WAIT_FOR_BOOT_COMPLETE_TIMEOUT: "300" + +run: + steps: + - REPO_PATH=$PWD + - cd Runner/suites/Performance/Boot_Systemd_Validate/ + - ./run.sh || true + - $REPO_PATH/Runner/utils/send-to-lava.sh Boot_Systemd_Validate.res || true diff --git a/Runner/suites/Performance/Boot_Systemd_Validate/Systemd_Boot_KPI_Tests_Overview.md b/Runner/suites/Performance/Boot_Systemd_Validate/Systemd_Boot_KPI_Tests_Overview.md new file mode 100644 index 00000000..01036f7d --- /dev/null +++ b/Runner/suites/Performance/Boot_Systemd_Validate/Systemd_Boot_KPI_Tests_Overview.md @@ -0,0 +1,514 @@ +Systemd Boot KPI: How to Use the Two Tests +========================================== + +We provide two complementary tests for measuring systemd boot KPIs: + +1. **Per-boot KPI collector** + `Boot_Systemd_Validate/run.sh` +2. **Reboot loop wrapper / KPI aggregator** + `Boot_Systemd_KPI_Loop/run.sh` + +They are designed to work together but serve **different use-cases**. + +Typical paths in qcom-linux-testkit: + +```text +suites/Performance/Boot_Systemd_Validate/run.sh +suites/Performance/Boot_Systemd_KPI_Loop/run.sh +``` + +--- + +1. `Boot_Systemd_Validate` – Per-boot KPI collector +--------------------------------------------------- + +**Path (example):** + +```text +suites/Performance/Boot_Systemd_Validate/run.sh +``` + +### Purpose + +Runs **once per boot** and collects detailed systemd boot KPIs: + +- `systemd-analyze time` (parsed into firmware/loader/kernel/userspace/total) +- `systemd-analyze blame` (full + top-20) +- `systemd-analyze critical-chain` +- `systemd-analyze plot` → `boot_analysis.svg` (optional) +- `systemd-analyze dot` → `boot.dot` +- `systemctl` unit dependency trees and per-unit state CSV +- Journals: full boot, warnings, errors (when `journalctl` is available) +- Optional **gating on required units** (e.g. “all critical services must be active”) +- **UEFI loader timings** from efivars (Init/Exec/Total) when EFI vars exist +- **Exclusion of slow services** from userspace/total (e.g. `systemd-networkd-wait-online.service`) + +All logs are stored under a test-local directory: + +```text +./logs_Boot_Systemd_Validate/ +``` + +When `--iterations N` is passed, the script still runs **once**, but includes +this hint in the KPI output so that the KPI loop wrapper knows the intended +window size. + +--- + +### Usage (CLI help) + +The script has a built-in help that matches the implementation: + +```text +Usage: ./run.sh [OPTIONS] + +Options: + --out DIR Output directory for logs (default: ./logs_Boot_Systemd_Validate) + --required FILE File listing systemd units that must become active + --timeout S Timeout per required unit (seconds, default: $TIMEOUT_PER_UNIT) + --no-svg Skip systemd-analyze plot SVG generation + --boot-type TYPE Tag boot type (e.g. cold, warm, unknown) + --disable-getty Disable serial-getty@ttyS0.service for this KPI run + --disable-sshd Disable sshd.service for this KPI run + + --exclude-networkd-wait-online + Exclude systemd-networkd-wait-online.service time + from userspace/total based on systemd-analyze blame + + --exclude-services "svc1 svc2 ..." + Exclude one or more services (matching names in + systemd-analyze blame) from userspace/total. + The summed time is subtracted and reported as + an effective KPI. + + --iterations N Hint for KPI iterations (wrapper/LAVA metadata; this + script still runs once per invocation) + + --verbose Dump key .txt artifacts from OUT_DIR to console for + LAVA debugging (skips large journal_*.txt files) + + -h, --help Show this help and exit +``` + +**Environment knobs (optional):** + +- `TIMEOUT_PER_UNIT` – default per-unit wait time for `--required` +- `SVG=yes|no` – default for SVG generation (overridden by `--no-svg`) +- `BOOT_TYPE` – default boot type tag (overridden by `--boot-type`) +- `BOOT_KPI_ITERATIONS` – default for the `iterations` field in the KPI output + +--- + +### Outputs / Artifacts + +All written under `OUT_DIR` (default: `./logs_Boot_Systemd_Validate`): + +- Platform + metadata + - `platform.txt`, `platform.json` + - `clocksource.txt` (current clocksource) + - `boot_type.txt` (e.g. `cold`, `warm`, `unknown`) + +- Units & dependencies + - `sysinit_deps.txt`, `basic_deps.txt` + - `units.list` + - `unit_states.csv` (per-unit state/export from `systemctl show`) + +- Systemd timing & graphs + - `analyze_time.txt` (raw `systemd-analyze time` output) + - `blame.txt`, `blame_top20.txt` + - `critical_chain.txt` + - `boot_analysis.svg` (unless `--no-svg`) + - `boot.dot` + +- Journals + - `journal_boot.txt` – full boot journal + - `journal_warn.txt` – warnings and above + - `journal_err.txt` – errors and above + +- Bootchart (if enabled via `init=/lib/systemd/systemd-bootchart`) + - `bootchart.tgz` (if present under `/run/log/...`) + +- Required units + - `failed_units.txt` (from `systemctl --failed`) + +- **KPI breakdown (this run)** + - `boot_kpi_this_run.txt` – structured, human-readable KPI summary + +--- + +### KPI breakdown: fields and exclusions + +At the end of the run, the script prints a KPI summary **to console** and +writes the same content into `boot_kpi_this_run.txt`, for example: + +```text +Boot KPI (this run) + boot_type : cold + iterations : 5 + clocksource : arch_sys_counter + uefi_time_sec : 438093.283 (Init=214751.707, Exec=223341.576) + firmware_time_sec : 3.765 + bootloader_time_sec : 0.176 + kernel_time_sec : 6.124 + userspace_time_sec : 126.942 + userspace_effective_time_sec : 6.825 + boot_total_sec : 137.008 + boot_total_effective_sec : 16.891 +``` + +Fields: + +- `uefi_time_sec` + Sum of UEFI loader Init+Exec time in seconds, derived from EFI vars: + + - `LoaderTimeInitUSec-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f` + - `LoaderTimeExecUSec-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f` + + with individual Init/Exec components also printed. + +- `firmware_time_sec`, `bootloader_time_sec`, `kernel_time_sec`, + `userspace_time_sec`, `boot_total_sec` + Parsed from `systemd-analyze time`: + + ```text + Startup finished in 3.801s (firmware) + 174ms (loader) + 6.106s (kernel) + 2min 7.045s (userspace) = 2min 17.127s + ``` + +- `userspace_effective_time_sec`, `boot_total_effective_sec` + + These are derived from the raw userspace/total time by subtracting: + + 1. `systemd-networkd-wait-online.service` time when + `--exclude-networkd-wait-online` is passed. + 2. Any additional services given via `--exclude-services "svc1 svc2"`. + +The script logs exclusions clearly, for example: + +```text +[INFO] ... Excluded systemd-networkd-wait-online.service=120.117s from userspace/total; boot_total_effective_sec=16.891 +[INFO] ... Excluded services from userspace/total (sum=2.500s): docker.service=0.966s; NetworkManager.service=1.534s; boot_total_effective_sec=14.391 +``` + +If `systemd-analyze time` reports: + +```text +Bootup is not yet finished (org.freedesktop.systemd1.Manager.FinishTimestampMonotonic=0). +``` + +the script: + +- Marks the timing fields as `unknown`. +- Logs the active jobs from `systemctl list-jobs` to **console** so that + blocking services (including our own KPI service if misconfigured) are + visible during LAVA debugging. + +This diagnostic logging happens **even without `--verbose`**. + +--- + +### Verbose mode (`--verbose`) + +When `--verbose` is set, the script: + +- Prints all “reasonable” `.txt` artifacts from `OUT_DIR` to console + (excluding `journal_*.txt` for size reasons). +- This is intended for LAVA and other CI where you cannot easily inspect the + filesystem but can scroll the job log. + +Example tail of the verbose section: + +```text +[INFO] ... Verbose mode: dumping text artifacts from ./logs_Boot_Systemd_Validate (excluding journal_*.txt) +===== analyze_time.txt ===== +Startup finished in ... +... +===== boot_kpi_this_run.txt ===== +Boot KPI (this run) + ... +``` + +--- + +### Typical usage examples + +**1) Basic per-boot KPI with required units** + +```sh +./run.sh --timeout 60 --required required-units.txt +``` + +**2) Cold-boot KPI, excluding networkd-wait-online + Docker/Weston** + +```sh +./run.sh --boot-type cold --disable-getty --exclude-networkd-wait-online --exclude-services "docker.service weston.service" +``` + +**3) LAVA-friendly verbose run** + +```sh +./run.sh --boot-type warm --disable-getty --exclude-networkd-wait-online --iterations 5 --verbose +``` + +In all cases, the main KPI is in `logs_Boot_Systemd_Validate/boot_kpi_this_run.txt` +and echoed to console. + +--- + +2. `Boot_Systemd_KPI_Loop` – Reboot loop wrapper & KPI aggregator +----------------------------------------------------------------- + +**Path (example):** + +```text +suites/Performance/Boot_Systemd_KPI_Loop/run.sh +``` + +### Purpose + +A **thin wrapper** that drives multiple KPI iterations across reboots and +computes averages over the last **N boots** of a given `boot_type`. + +On each (re)boot it: + +1. Loads state from `Boot_Systemd_KPI_Loop.state` (if present) to determine: + - Total iterations requested + - Iterations already completed + - Boot type & options + - KPI script path + base out dir +2. Computes **this iteration index**, and a per-iteration out dir: + + ```text + /iter_ + ``` + +3. Calls `Boot_Systemd_Validate/run.sh` once with: + - `--out /iter_N` + - `--boot-type ` + - `--iterations ` + - Forwarded flags (`--disable-getty`, `--exclude-...`, `--verbose`, etc.) +4. Parses `boot_kpi_this_run.txt` for this iteration, appends a row into: + + ```text + Boot_Systemd_KPI_stats.csv + ``` + +5. Computes averages over the last **N entries** for this `boot_type` and writes: + + ```text + Boot_Systemd_KPI_summary.txt + ``` + +6. In **auto-reboot mode**, if more iterations are pending: + - Updates `Boot_Systemd_KPI_Loop.state` + - Triggers a reboot + - A small systemd service (`boot-systemd-kpi-loop.service`) invokes this + script again on the next boot until all iterations complete. + +When all iterations finish, the wrapper: + +- Prints the KPI average summary to console. +- Leaves `.csv` and `.summary.txt` for further analysis. +- Cleans up the systemd hook + state file in auto-reboot mode. + +--- + +### Usage (CLI help) + +```text +Usage: ./run.sh [OPTIONS] + +This wrapper: + * Runs Boot_Systemd_Validate once for the *current boot* + * Uses a per-iteration KPI out dir when --iterations > 1: + base: ../Boot_Systemd_Validate/logs_Boot_Systemd_Validate + iter: /iter_ + * Parses boot_kpi_this_run.txt from that test + * Appends a row into Boot_Systemd_KPI_stats.csv + * Computes averages over the last N boots (per boot_type) and prints summary. + +Options: + --kpi-script PATH Override Boot_Systemd_Validate script path + (default: ../Boot_Systemd_Validate/run.sh) + + --kpi-out-dir DIR Override base KPI output dir + (default: ../Boot_Systemd_Validate/logs_Boot_Systemd_Validate) + + --iterations N Number of boots to average over (default: 1) + --boot-type TYPE Tag for this run (e.g. cold, warm, unknown) + + # Options forwarded to Boot_Systemd_Validate: + --disable-getty Disable serial-getty@ttyS0.service + --disable-sshd Disable sshd.service + --exclude-networkd-wait-online + Exclude systemd-networkd-wait-online.service + --exclude-services "A B" + Exclude these services from userspace/total + --no-svg Disable SVG plot generation + --verbose Print KPI .txt artifacts to console for debug + + # Auto-reboot orchestration: + --auto-reboot Install systemd hook and auto-reboot until + --iterations boots are collected. State is + stored in: Boot_Systemd_KPI_Loop.state + + -h, --help Show this help and exit +``` + +--- + +### Files written by the loop wrapper + +Under the same directory as `Boot_Systemd_KPI_Loop/run.sh`: + +- `Boot_Systemd_KPI_Loop.res` + PASS/FAIL status for the wrapper itself. + +- `Boot_Systemd_KPI_Loop.state` + Persistent state across reboots (total iterations, done so far, boot_type, + options, KPI script path/out dir). Removed automatically when all iterations + complete or on error. + +- `Boot_Systemd_KPI_stats.csv` + Rolling KPI database across boots. Each row corresponds to the parsed + `boot_kpi_this_run.txt` of one boot (for a given `boot_type`). + +- `Boot_Systemd_KPI_summary.txt` + Human-readable summary of averages over the last **N** entries of that + `boot_type`, e.g.: + + ```text + Boot KPI summary (last 5 cold boot(s)) + entries_used : 5 + target_iterations : 5 + boot_type : cold + avg_uefi_time_sec : ... + avg_firmware_time_sec : ... + avg_bootloader_time_sec : ... + avg_kernel_time_sec : ... + avg_userspace_time_sec : ... + avg_userspace_effective_time_sec : ... + avg_boot_total_sec : ... + avg_boot_total_effective_sec : ... + ``` + +- `Boot_Systemd_KPI_Loop_stdout_.log` + Stdout/stderr log(s) for the wrapper itself (if you preserve them). + +Per-iteration artifacts from `Boot_Systemd_Validate` live under: + +```text +../Boot_Systemd_Validate/logs_Boot_Systemd_Validate/iter_1/ +../Boot_Systemd_Validate/logs_Boot_Systemd_Validate/iter_2/ +... +``` + +Each `iter_N` has its own `boot_kpi_this_run.txt`, `analyze_time.txt`, etc. + +--- + +### Auto-reboot mode details + +When `--auto-reboot` is passed: + +- The wrapper installs a small systemd service (e.g. `boot-systemd-kpi-loop.service`) + that runs the wrapper at boot. +- On each boot, the wrapper: + - Runs `Boot_Systemd_Validate` once. + - Updates the `.state` file with the new iteration count. + - If more iterations are required, it requests `reboot` again. +- After the final iteration: + - KPI averages are computed and printed. + - The systemd hook is removed. + - The state file is deleted. + +The reboot logic is designed to: + +- Ensure the reboot actually happens (falling back between `reboot` and `/sbin/reboot`). +- Avoid blocking `systemd-analyze` permanently: the KPI scripts finish quickly, + and if any unit (including our own) prevents boot from completing, it will + show up in the “Bootup is not yet finished … list-jobs” diagnostics inside + each `iter_N/analyze_time.txt` and in the **console logs**. + +--- + +### Typical usage examples + +**1) Manual KPI over last 5 cold boots (no auto-reboot)** + +You manually reboot the board between runs: + +```sh +# Boot 1 (cold boot) +./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online + +# Reboot the board manually (power-cycle or reboot) + +# Boot 2..5 – re-run the same command each time +./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online +... +``` + +After the 5th run, `Boot_Systemd_KPI_summary.txt` will contain the averages over +the last 5 `cold` entries. + +**2) Fully automated cold-boot KPI campaign (auto-reboot)** + +```sh +./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online --auto-reboot +``` + +The wrapper will: + +- Run `Boot_Systemd_Validate` on this boot. +- Reboot automatically until 5 iterations are captured. +- Finally, print a KPI summary and clean up the systemd hook/state. + +**3) Warm-boot KPI with extra service exclusions and verbose logs** + +```sh +./run.sh --iterations 3 --boot-type warm --disable-getty --exclude-networkd-wait-online --exclude-services "docker.service weston.service" --auto-reboot --verbose +``` + +This gives: + +- Per-iteration directories: `iter_1`, `iter_2`, `iter_3`. +- Detailed logs printed to console from `Boot_Systemd_Validate` via `--verbose`. +- Aggregated averages in `Boot_Systemd_KPI_summary.txt`. + +--- + +3. Which one should I use? +-------------------------- + +| Scenario | Recommended test | Notes | +|----------------------------------------------|---------------------------------------|-----------------------------------------------------------------------| +| Standard CI pipeline (no reboot-resume) | `Boot_Systemd_Validate` | Run once per job; no reboot inside the script. | +| Manual KPI measurement on a single boot | `Boot_Systemd_Validate` | E.g. after changing kernel/systemd configs. | +| Quick health-check of systemd units | `Boot_Systemd_Validate` | Use `--required` to gate on critical services. | +| Lab KPI across N cold/warm boots | `Boot_Systemd_KPI_Loop` | Wrapper handles per-boot dirs + CSV + averages; you may reboot manually. | +| Automated multi-boot campaign in lab | `Boot_Systemd_KPI_Loop` with `--auto-reboot` | State file + systemd hook handle the full loop. | +| CI with explicit reboot-resume support | `Boot_Systemd_KPI_Loop` (if allowed) | CI must re-run the script after each reboot. | + +--- + +4. Design principles +-------------------- + +- **Single responsibility** + - `Boot_Systemd_Validate`: _measure one boot and emit KPIs_. + - `Boot_Systemd_KPI_Loop`: _across boots: state, reboots, aggregation_. + +- **CI friendliness** + - CI that cannot handle reboots should only use `Boot_Systemd_Validate`. + - Reboot orchestration via `--auto-reboot` is explicitly opt-in. + +- **Robust & transparent** + - Rolling CSV + summary for long-term trends. + - Clear console logs for: + - service time exclusions, + - non-finished boots (`Bootup is not yet finished` + `systemctl list-jobs`), + - per-iteration KPI values. + +- **Local logs only** + - All artifacts (CSV, SVG, journals, etc.) are stored under the test’s + working directory, making log collection and LAVA parsing straightforward. diff --git a/Runner/suites/Performance/Boot_Systemd_Validate/run.sh b/Runner/suites/Performance/Boot_Systemd_Validate/run.sh new file mode 100755 index 00000000..c7722a93 --- /dev/null +++ b/Runner/suites/Performance/Boot_Systemd_Validate/run.sh @@ -0,0 +1,564 @@ +#!/bin/sh +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear +# Systemd boot KPI + validation (single run). + +SCRIPT_DIR="$( + cd "$(dirname "$0")" || exit 1 + pwd +)" + +TESTNAME="Boot_Systemd_Validate" +RES_FILE="./${TESTNAME}.res" + +# Defaults (env may override; CLI parsing later overrides again) +OUT_DIR="${OUT_DIR:-./logs_${TESTNAME}}" +REQ_UNITS_FILE="${REQ_UNITS_FILE:-}" +REQUIRED_UNITS="${REQUIRED_UNITS:-}" +TIMEOUT_PER_UNIT="${TIMEOUT_PER_UNIT:-30}" +SVG="${SVG:-yes}" +BOOT_TYPE="${BOOT_TYPE:-unknown}" +DISABLE_GETTY="${DISABLE_GETTY:-0}" +DISABLE_SSHD="${DISABLE_SSHD:-0}" +EXCLUDE_NETWORKD_WAIT_ONLINE="${EXCLUDE_NETWORKD_WAIT_ONLINE:-0}" +EXCLUDE_SERVICES="${EXCLUDE_SERVICES:-}" +BOOT_KPI_ITERATIONS="${BOOT_KPI_ITERATIONS:-1}" +VERBOSE="${VERBOSE:-0}" +BOOT_NOT_FINISHED=0 + +# Optional: make boot-complete wait configurable +WAIT_FOR_BOOT_COMPLETE_TIMEOUT="${WAIT_FOR_BOOT_COMPLETE_TIMEOUT:-300}" + +usage() { + cat <&2 + exit 0 + ;; +esac + +# --- locate and source init_env → functestlib.sh + lib_performance.sh --- +INIT_ENV="" +SEARCH="$SCRIPT_DIR" + +while [ "$SEARCH" != "/" ]; do + if [ -f "$SEARCH/init_env" ]; then + INIT_ENV="$SEARCH/init_env" + break + fi + SEARCH=$(dirname "$SEARCH") +done + +if [ -z "$INIT_ENV" ]; then + echo "[ERROR] Could not find init_env (starting at $SCRIPT_DIR)" >&2 + exit 1 +fi + +# Only source once (idempotent) +# NOTE: We intentionally **do not export** any new vars. They stay local to this shell. +if [ -z "${__INIT_ENV_LOADED:-}" ]; then + # shellcheck disable=SC1090 + . "$INIT_ENV" + __INIT_ENV_LOADED=1 +fi + +# shellcheck disable=SC1090 +. "$INIT_ENV" +# shellcheck disable=SC1091 +. "$TOOLS/functestlib.sh" +# shellcheck disable=SC1091 +. "$TOOLS/lib_performance.sh" + +# --- allow LAVA params (env) to drive defaults cleanly --- +# (CLI still overrides these later via parsing) +OUT_DIR="${OUT_DIR:-./logs_${TESTNAME}}" + +# Support either REQUIRED_UNITS_FILE (file path) or REQUIRED_UNITS (list) +REQ_UNITS_FILE="${REQUIRED_UNITS_FILE:-${REQ_UNITS_FILE:-}}" +REQUIRED_UNITS="${REQUIRED_UNITS:-}" + +TIMEOUT_PER_UNIT="${TIMEOUT_PER_UNIT:-30}" +SVG="${SVG:-yes}" +BOOT_TYPE="${BOOT_TYPE:-unknown}" + +DISABLE_GETTY="${DISABLE_GETTY:-0}" +DISABLE_SSHD="${DISABLE_SSHD:-0}" +EXCLUDE_NETWORKD_WAIT_ONLINE="${EXCLUDE_NETWORKD_WAIT_ONLINE:-0}" +EXCLUDE_SERVICES="${EXCLUDE_SERVICES:-}" +BOOT_KPI_ITERATIONS="${BOOT_KPI_ITERATIONS:-1}" +VERBOSE="${VERBOSE:-0}" + +# If REQUIRED_UNITS is provided (space/comma-separated) and no file given, materialize it. +if [ -z "$REQ_UNITS_FILE" ] && [ -n "$REQUIRED_UNITS" ]; then + mkdir -p "$OUT_DIR" 2>/dev/null || true + REQ_UNITS_FILE="$OUT_DIR/required_units.txt" + printf '%s\n' "$REQUIRED_UNITS" | tr ',' ' ' | tr ' ' '\n' | sed '/^$/d' >"$REQ_UNITS_FILE" 2>/dev/null || true +fi + +# --- CLI parsing --- +while [ "$#" -gt 0 ]; do + case "$1" in + --out) shift; OUT_DIR="$1" ;; + --required) shift; REQ_UNITS_FILE="$1" ;; + --timeout) shift; TIMEOUT_PER_UNIT="$1" ;; + --no-svg) SVG="no" ;; + --boot-type) shift; BOOT_TYPE="$1" ;; + --disable-getty) DISABLE_GETTY=1 ;; + --disable-sshd) DISABLE_SSHD=1 ;; + --exclude-networkd-wait-online) EXCLUDE_NETWORKD_WAIT_ONLINE=1 ;; + --exclude-services) shift; EXCLUDE_SERVICES="$1" ;; + --iterations) shift; BOOT_KPI_ITERATIONS="$1" ;; + --verbose) VERBOSE=1 ;; + -h|--help) + usage >&2 + exit 0 + ;; + *) + log_warn "Unknown option: $1" + usage >&2 + echo "$TESTNAME FAIL" >"$RES_FILE" + exit 1 + ;; + esac + shift +done + +mkdir -p "$OUT_DIR" || { + log_error "Cannot create $OUT_DIR" + echo "$TESTNAME FAIL" >"$RES_FILE" + exit 1 +} + +# Basic tools check (keep light; others are optional) +check_dependencies systemd-analyze uname sed awk grep find sort || { + log_skip "$TESTNAME SKIP - basic tools missing" + echo "$TESTNAME SKIP" >"$RES_FILE" + exit 0 +} + +# --- ensure CPU governors restored on exit --- +cleanup() { + restore_governor +} +trap cleanup EXIT + +# --- Set performance governor for KPI run --- +set_performance_governor + +# --- Clocksource + boot type tagging --- +capture_clocksource "$OUT_DIR/clocksource.txt" +capture_boot_type "$BOOT_TYPE" "$OUT_DIR/boot_type.txt" + +# --- Optionally disable heavy services (getty/sshd) --- +disable_heavy_services_if_requested "$DISABLE_GETTY" "$DISABLE_SSHD" + +# --- Wait for boot complete (multi-user.target) if possible --- +if command -v wait_for_boot_complete >/dev/null 2>&1; then + wait_for_boot_complete "$WAIT_FOR_BOOT_COMPLETE_TIMEOUT" +else + if command -v systemctl >/dev/null 2>&1; then + if systemctl is-active --quiet multi-user.target; then + log_info "Boot complete: multi-user.target is active" + else + log_warn "multi-user.target not active; continuing KPI capture anyway" + fi + else + log_warn "systemctl not found; cannot verify boot-complete target" + fi +fi + +# ---------- Platform snapshot ---------- +detect_platform + +{ + echo "timestamp=$(nowstamp)" + echo "kernel=$PLATFORM_KERNEL" + echo "arch=$PLATFORM_ARCH" + echo "uname_s=$PLATFORM_UNAME_S" + echo "hostname=$PLATFORM_HOSTNAME" + echo "soc_machine=$PLATFORM_SOC_MACHINE" + echo "soc_id=$PLATFORM_SOC_ID" + echo "soc_family=$PLATFORM_SOC_FAMILY" + echo "dt_model=$PLATFORM_DT_MODEL" + echo "dt_compatible=$PLATFORM_DT_COMPAT" + echo "os_like=$PLATFORM_OS_LIKE" + echo "os_name=$PLATFORM_OS_NAME" + echo "target=$PLATFORM_TARGET" + echo "machine=$PLATFORM_MACHINE" +} >"$OUT_DIR/platform.txt" +log_info "Platform info → $OUT_DIR/platform.txt" + +{ + printf '{' + printf '"timestamp":"%s",' "$(nowstamp)" + printf '"kernel":"%s",' "$(esc "$PLATFORM_KERNEL")" + printf '"arch":"%s",' "$(esc "$PLATFORM_ARCH")" + printf '"uname_s":"%s",' "$(esc "$PLATFORM_UNAME_S")" + printf '"hostname":"%s",' "$(esc "$PLATFORM_HOSTNAME")" + printf '"soc_machine":"%s",' "$(esc "$PLATFORM_SOC_MACHINE")" + printf '"soc_id":"%s",' "$(esc "$PLATFORM_SOC_ID")" + printf '"soc_family":"%s",' "$(esc "$PLATFORM_SOC_FAMILY")" + printf '"dt_model":"%s",' "$(esc "$PLATFORM_DT_MODEL")" + printf '"dt_compatible":"%s",' "$(esc "$PLATFORM_DT_COMPAT")" + printf '"os_like":"%s",' "$(esc "$PLATFORM_OS_LIKE")" + printf '"os_name":"%s",' "$(esc "$PLATFORM_OS_NAME")" + printf '"target":"%s",' "$(esc "$PLATFORM_TARGET")" + printf '"machine":"%s"' "$(esc "$PLATFORM_MACHINE")" + printf '}\n' +} >"$OUT_DIR/platform.json" +log_info "Platform JSON → $OUT_DIR/platform.json" + +# ---------- systemd dependency trees ---------- +if command -v systemctl >/dev/null 2>&1; then + systemctl list-dependencies sysinit.target --plain --all >"$OUT_DIR/sysinit_deps.txt" 2>&1 || true + systemctl list-dependencies basic.target --plain --all >"$OUT_DIR/basic_deps.txt" 2>&1 || true +else + log_warn "systemctl not found; skipping dependency trees" +fi + +# ---------- units + states CSV ---------- +units_file="$OUT_DIR/units.list" +: >"$units_file" +if command -v systemctl >/dev/null 2>&1; then + systemctl list-dependencies sysinit.target --plain --all 2>/dev/null \ + | sed '1d' | tr -d '●' | sed 's/^[[:space:]]*//' >>"$units_file" || true + systemctl list-dependencies basic.target --plain --all 2>/dev/null \ + | sed '1d' | tr -d '●' | sed 's/^[[:space:]]*//' >>"$units_file" || true + systemctl list-units --type=service --state=active --no-legend 2>/dev/null \ + | awk '{print $1}' >>"$units_file" || true + sort -u "$units_file" | grep -E '\.(service|target|mount|socket|path|timer)$' >"$units_file.tmp" 2>/dev/null || true + mv -f "$units_file.tmp" "$units_file" 2>/dev/null || true + + csv="$OUT_DIR/unit_states.csv" + echo "unit,active_state,sub_state,load_state,enabled,start_usec,fragment_path,source_path,default_deps" >"$csv" + while IFS= read -r u; do + [ -n "$u" ] || continue + show_out="$(systemctl show "$u" \ + -p Id -p ActiveState -p SubState -p LoadState -p UnitFileState \ + -p ActiveEnterTimestampMonotonic -p FragmentPath -p SourcePath -p DefaultDependencies 2>/dev/null || true)" + id=$(printf '%s\n' "$show_out" | sed -n 's/^Id=//p' | head -n 1) + act=$(printf '%s\n' "$show_out" | sed -n 's/^ActiveState=//p' | head -n 1) + sub=$(printf '%s\n' "$show_out" | sed -n 's/^SubState=//p' | head -n 1) + load=$(printf '%s\n' "$show_out" | sed -n 's/^LoadState=//p' | head -n 1) + en=$(printf '%s\n' "$show_out" | sed -n 's/^UnitFileState=//p' | head -n 1) + usec=$(printf '%s\n' "$show_out" | sed -n 's/^ActiveEnterTimestampMonotonic=//p' | head -n 1) + frag=$(printf '%s\n' "$show_out" | sed -n 's/^FragmentPath=//p' | head -n 1) + src=$(printf '%s\n' "$show_out" | sed -n 's/^SourcePath=//p' | head -n 1) + ddef=$(printf '%s\n' "$show_out" | sed -n 's/^DefaultDependencies=//p' | head -n 1) + + id=$(printf '%s' "$id" | tr '"' "'") + act=$(printf '%s' "$act" | tr '"' "'") + sub=$(printf '%s' "$sub" | tr '"' "'") + load=$(printf '%s' "$load" | tr '"' "'") + en=$(printf '%s' "$en" | tr '"' "'") + usec=$(printf '%s' "$usec" | tr '"' "'") + frag=$(printf '%s' "$frag" | tr '"' "'") + src=$(printf '%s' "$src" | tr '"' "'") + ddef=$(printf '%s' "$ddef" | tr '"' "'") + + printf '"%s","%s","%s","%s","%s","%s","%s","%s","%s"\n' \ + "$id" "$act" "$sub" "$load" "$en" "$usec" "$frag" "$src" "$ddef" >>"$csv" + done <"$units_file" + log_info "Wrote unit states CSV → $csv" +else + log_warn "systemctl not found; skipping unit state CSV" +fi + +# ---------- systemd-analyze artifacts ---------- +an_time="$OUT_DIR/analyze_time.txt" +an_blame="$OUT_DIR/blame.txt" +an_blame_top="$OUT_DIR/blame_top20.txt" +an_chain="$OUT_DIR/critical_chain.txt" +jobs_unfinished="$OUT_DIR/list_jobs_when_boot_unfinished.txt" + +BOOT_NOT_FINISHED=0 + +if command -v systemd-analyze >/dev/null 2>&1; then + : >"$jobs_unfinished" + + if command -v wait_analyze_ready >/dev/null 2>&1; then + # Preferred path: shared helper from lib_performance.sh + max_wait="${WAIT_ANALYZE_FINISH_TIMEOUT:-240}" # bump default to 240s + interval="${WAIT_ANALYZE_FINISH_INTERVAL:-5}" + + if wait_analyze_ready "$an_time" "$jobs_unfinished" \ + "$max_wait" "$interval"; then + BOOT_NOT_FINISHED=0 + else + BOOT_NOT_FINISHED=1 + log_warn "systemd-analyze did not report finished boot within ${max_wait}s, KPIs may stay 'unknown'. See $an_time and $jobs_unfinished." + fi + else + # Fallback: original inline loop, with larger default timeout + wait_analyze="${WAIT_ANALYZE_FINISH_TIMEOUT:-240}" + i=0 + got_finish=0 + + while [ "$i" -le "$wait_analyze" ]; do + systemd-analyze time >"$an_time" 2>&1 || true + + if grep -q 'Startup finished in' "$an_time" 2>/dev/null; then + got_finish=1 + BOOT_NOT_FINISHED=0 + break + fi + + if grep -q 'Bootup is not yet finished' "$an_time" 2>/dev/null; then + BOOT_NOT_FINISHED=1 + systemctl list-jobs >"$jobs_unfinished" 2>&1 || true + fi + + i=$((i+1)) + sleep 1 + done + + if [ "$got_finish" -eq 1 ]; then + first_line=$(sed -n '1p' "$an_time" 2>/dev/null || true) + if [ -n "$first_line" ]; then + log_info "systemd-analyze time: $first_line" + else + log_info "systemd-analyze time written to $an_time" + fi + else + log_warn "systemd-analyze reports boot not finished even after ${wait_analyze}s KPI breakdown may remain 'unknown'. See $an_time and $jobs_unfinished." + fi + fi + + systemd-analyze critical-chain >"$an_chain" 2>&1 || true + log_info "systemd-analyze critical-chain → $an_chain" + + systemd-analyze blame >"$an_blame" 2>&1 || true + head -n 20 "$an_blame" >"$an_blame_top" 2>/dev/null \ + || cp "$an_blame" "$an_blame_top" 2>/dev/null || true + log_info "Top 20 services by time (systemd-analyze blame) → $an_blame_top" + + if [ "$SVG" = "yes" ]; then + systemd-analyze plot >"$OUT_DIR/boot_analysis.svg" 2>/dev/null || true + log_info "Boot SVG timeline → $OUT_DIR/boot_analysis.svg" + else + log_info "SVG plot disabled via --no-svg" + fi + + systemd-analyze dot >"$OUT_DIR/boot.dot" 2>/dev/null || true + log_info "Boot dependency DOT graph → $OUT_DIR/boot.dot" +else + log_warn "systemd-analyze not found, skipping timing/critical-chain/blame/plot" +fi + +# ---------- Bootchart (optional) ---------- +if bootchart_enabled; then + for p in /run/log/bootchart.tgz /run/log/bootchart/bootchart.tgz; do + if [ -f "$p" ]; then + cp "$p" "$OUT_DIR/bootchart.tgz" 2>/dev/null || true + if [ -f "$OUT_DIR/bootchart.tgz" ]; then + log_info "Bootchart archive → $OUT_DIR/bootchart.tgz" + fi + break + fi + done +else + log_skip "systemd-bootchart not enabled in cmdline; skipping bootchart-specific collection" +fi + +# ---------- Failed units + journal ---------- +if command -v systemctl >/dev/null 2>&1; then + systemctl --failed >"$OUT_DIR/failed_units.txt" 2>&1 || true +fi + +if command -v journalctl >/dev/null 2>&1; then + journalctl -b >"$OUT_DIR/journal_boot.txt" 2>&1 || true + journalctl -b -p warning..alert >"$OUT_DIR/journal_warn.txt" 2>&1 || true + journalctl -b -p err..alert >"$OUT_DIR/journal_err.txt" 2>&1 || true +else + log_warn "journalctl not found; skipping boot journal capture" +fi + +# ---------- required units gating ---------- +suite_rc=0 +if [ -n "$REQ_UNITS_FILE" ]; then + if command -v systemctl >/dev/null 2>&1; then + rc=0 + while IFS= read -r u; do + [ -n "$u" ] || continue + if ! systemctl is-active --quiet "$u"; then + log_info "Waiting for $u (up to ${TIMEOUT_PER_UNIT}s)..." + i=0 + while [ "$i" -lt "$TIMEOUT_PER_UNIT" ]; do + systemctl is-active --quiet "$u" && break + sleep 1 + i=$((i+1)) + done + fi + if systemctl is-active --quiet "$u"; then + log_info "[ok] $u is active" + else + log_fail "[fail] $u not active after ${TIMEOUT_PER_UNIT}s" + rc=1 + fi + done <"$REQ_UNITS_FILE" + [ "$rc" -eq 0 ] || suite_rc=1 + else + log_warn "systemctl not found; cannot verify required units" + fi +else + log_warn "No --required file provided; not gating PASS/FAIL on specific units" +fi + +# ---------- KPI breakdown (this run) ---------- +CLOCKSOURCE="unknown" +if [ -f "$OUT_DIR/clocksource.txt" ]; then + CLOCKSOURCE=$( + grep '^clocksource=' "$OUT_DIR/clocksource.txt" 2>/dev/null \ + | sed 's/^clocksource=//' | head -n 1 + ) + [ -n "$CLOCKSOURCE" ] || CLOCKSOURCE="unknown" +fi + +# Read UEFI loader times (efivars) +perf_read_uefi_loader_times +UEFI_INITs="${PERF_UEFI_INIT_SEC:-unknown}" +UEFI_EXECs="${PERF_UEFI_EXEC_SEC:-unknown}" +UEFI_TOTAL="${PERF_UEFI_TOTAL_SEC:-unknown}" + +# Parse systemd-analyze time/blame +FIRMWARE_SEC="" +LOADER_SEC="" +KERNEL_SEC="" +USERSPACE_SEC="" +TOTAL_SEC="" +USERSPACE_EFF="" +TOTAL_EFF="" + +if [ "$BOOT_NOT_FINISHED" -eq 0 ]; then + perf_parse_boot_times "$an_time" "$an_blame" "$EXCLUDE_NETWORKD_WAIT_ONLINE" + + FIRMWARE_SEC="${PERF_FIRMWARE_SEC:-}" + LOADER_SEC="${PERF_LOADER_SEC:-}" + KERNEL_SEC="${PERF_KERNEL_SEC:-}" + USERSPACE_SEC="${PERF_USERSPACE_SEC:-}" + TOTAL_SEC="${PERF_TOTAL_SEC:-}" + + USERSPACE_EFF="${PERF_USERSPACE_EFFECTIVE_SEC:-$USERSPACE_SEC}" + TOTAL_EFF="${PERF_TOTAL_EFFECTIVE_SEC:-$TOTAL_SEC}" +else + log_warn "Boot not finished according to systemd-analyze; leaving KPI time fields as 'unknown'. See $an_time and $jobs_unfinished." +fi + +# Extra service exclusions (beyond networkd-wait-online) +EXCL_SVC_SEC="" +EXCL_SVC_DETAIL="" +if [ -n "$EXCLUDE_SERVICES" ] && [ -f "$an_blame" ]; then + sum="0" + detail="" + for svc in $EXCLUDE_SERVICES; do + line=$(grep "[[:space:]]$svc\$" "$an_blame" 2>/dev/null | head -n 1 || true) + [ -n "$line" ] || continue + seg=$(printf '%s\n' "$line" | awk '{NF--; print}') + t=$(perf_time_segment_to_sec "$seg") + [ -n "$t" ] || continue + detail="${detail}${svc}=${t}s; " + sum=$(printf '%s %s\n' "$sum" "$t" | awk '{printf("%.3f\n", $1+$2)}') + done + if [ "$sum" != "0" ]; then + EXCL_SVC_SEC="$sum" + EXCL_SVC_DETAIL="$detail" + if [ -n "$USERSPACE_EFF" ]; then + USERSPACE_EFF=$(printf '%s %s\n' "$USERSPACE_EFF" "$sum" \ + | awk '{d=$1-$2; if (d<0) d=0; printf("%.3f\n", d)}') + fi + if [ -n "$TOTAL_EFF" ]; then + TOTAL_EFF=$(printf '%s %s\n' "$TOTAL_EFF" "$sum" \ + | awk '{d=$1-$2; if (d<0) d=0; printf("%.3f\n", d)}') + fi + fi +fi + +# Log exclusions clearly +if [ "$EXCLUDE_NETWORKD_WAIT_ONLINE" -eq 1 ] && [ -n "${PERF_NETWORKD_WAIT_ONLINE_SEC:-}" ]; then + log_info "Excluded systemd-networkd-wait-online.service=${PERF_NETWORKD_WAIT_ONLINE_SEC}s from userspace/total; boot_total_effective_sec=$TOTAL_EFF" +fi +if [ -n "$EXCL_SVC_SEC" ]; then + log_info "Excluded services from userspace/total (sum=${EXCL_SVC_SEC}s): $EXCL_SVC_DETAIL boot_total_effective_sec=$TOTAL_EFF" +fi + +# KPI printout (console + file) +kpi_file="$OUT_DIR/boot_kpi_this_run.txt" +{ + echo "Boot KPI (this run)" + echo " boot_type : $BOOT_TYPE" + echo " iterations : $BOOT_KPI_ITERATIONS" + echo " clocksource : $CLOCKSOURCE" + echo " uefi_time_sec : $UEFI_TOTAL (Init=$UEFI_INITs, Exec=$UEFI_EXECs)" + echo " firmware_time_sec : ${FIRMWARE_SEC:-unknown}" + echo " bootloader_time_sec : ${LOADER_SEC:-unknown}" + echo " kernel_time_sec : ${KERNEL_SEC:-unknown}" + echo " userspace_time_sec : ${USERSPACE_SEC:-unknown}" + echo " userspace_effective_time_sec : ${USERSPACE_EFF:-unknown}" + echo " boot_total_sec : ${TOTAL_SEC:-unknown}" + echo " boot_total_effective_sec : ${TOTAL_EFF:-unknown}" +} | tee "$kpi_file" + +log_info "Boot KPI breakdown (this run) → $kpi_file" + +# ---------- VERBOSE: dump key .txt artifacts to console ---------- +if [ "$VERBOSE" -eq 1 ]; then + log_info "Verbose mode: dumping text artifacts from $OUT_DIR (excluding journal_*.txt)" + for f in "$OUT_DIR"/*.txt; do + [ -f "$f" ] || continue + base=$(basename "$f") + case "$base" in + journal_*.txt) + # Skip huge journal files in verbose mode + continue + ;; + esac + echo "===== $base =====" + cat "$f" + echo + done +fi + +# ---------- final PASS/FAIL ---------- +if [ "$suite_rc" -eq 0 ]; then + log_pass "$TESTNAME: PASS" + echo "$TESTNAME PASS" >"$RES_FILE" +else + log_fail "$TESTNAME: FAIL" + echo "$TESTNAME FAIL" >"$RES_FILE" +fi + +# restore_governor via trap +exit "$suite_rc" From 99624c73944ec15bad6c8c6e2bd62b58c19fd419 Mon Sep 17 00:00:00 2001 From: Srikanth Muppandam Date: Mon, 8 Dec 2025 11:31:56 +0530 Subject: [PATCH 3/3] Boot_Systemd_KPI_Loop: decouple KPI loop from boot transaction Switch auto-reboot orchestration to a oneshot systemd service + timer. Let the loop manage iterations/state while keeping the boot path clean. Ensure the KPI service exits quickly so FinishTimestampMonotonic is not blocked. Document usage while preserving behavior for manual single-shot runs. Signed-off-by: Srikanth Muppandam --- .../Boot_Systemd_KPI_Loop.yaml | 37 ++ .../Systemd_Boot_KPI_Tests_Overview.md | 514 ++++++++++++++++++ .../Performance/Boot_Systemd_KPI_Loop/run.sh | 388 +++++++++++++ 3 files changed, 939 insertions(+) create mode 100755 Runner/suites/Performance/Boot_Systemd_KPI_Loop/Boot_Systemd_KPI_Loop.yaml create mode 100644 Runner/suites/Performance/Boot_Systemd_KPI_Loop/Systemd_Boot_KPI_Tests_Overview.md create mode 100755 Runner/suites/Performance/Boot_Systemd_KPI_Loop/run.sh diff --git a/Runner/suites/Performance/Boot_Systemd_KPI_Loop/Boot_Systemd_KPI_Loop.yaml b/Runner/suites/Performance/Boot_Systemd_KPI_Loop/Boot_Systemd_KPI_Loop.yaml new file mode 100755 index 00000000..aaac3e14 --- /dev/null +++ b/Runner/suites/Performance/Boot_Systemd_KPI_Loop/Boot_Systemd_KPI_Loop.yaml @@ -0,0 +1,37 @@ +metadata: + name: boot-systemd-kpi-loop + format: "Lava-Test Test Definition 1.0" + description: "Multi-boot KPI aggregator wrapper for Boot_Systemd_Validate with optional auto-reboot orchestration." + os: + - linux + scope: + - performance + - functional + +params: + # Where the child KPI script lives (Boot_Systemd_Validate) + KPI_SCRIPT: "./../Boot_Systemd_Validate/run.sh" + KPI_OUT_DIR: "./../Boot_Systemd_Validate/logs_Boot_Systemd_Validate" + + # Averaging window / iteration collection + ITERATIONS: "5" + BOOT_TYPE: "cold" # cold|warm|unknown etc + + # Forwarded knobs to Boot_Systemd_Validate + DISABLE_GETTY: "1" # 1|0 + DISABLE_SSHD: "0" # 1|0 + EXCLUDE_NETWORKD_WAIT_ONLINE: "1" # 1|0 + EXCLUDE_SERVICES: "" # space-separated service names + NO_SVG: "1" # 1 disables svg + VERBOSE: "0" # 1 dumps key artifacts + + # Orchestration + AUTO_REBOOT: "0" # 1 enables loop orchestration + REBOOT_RESULT_MODE: "PASS" # PASS (default) or SKIP when reboot requested mid-loop + +run: + steps: + - REPO_PATH=$PWD + - cd Runner/suites/Performance/Boot_Systemd_KPI_Loop/ + - ./run.sh || true + - $REPO_PATH/Runner/utils/send-to-lava.sh Boot_Systemd_KPI_Loop.res || true diff --git a/Runner/suites/Performance/Boot_Systemd_KPI_Loop/Systemd_Boot_KPI_Tests_Overview.md b/Runner/suites/Performance/Boot_Systemd_KPI_Loop/Systemd_Boot_KPI_Tests_Overview.md new file mode 100644 index 00000000..01036f7d --- /dev/null +++ b/Runner/suites/Performance/Boot_Systemd_KPI_Loop/Systemd_Boot_KPI_Tests_Overview.md @@ -0,0 +1,514 @@ +Systemd Boot KPI: How to Use the Two Tests +========================================== + +We provide two complementary tests for measuring systemd boot KPIs: + +1. **Per-boot KPI collector** + `Boot_Systemd_Validate/run.sh` +2. **Reboot loop wrapper / KPI aggregator** + `Boot_Systemd_KPI_Loop/run.sh` + +They are designed to work together but serve **different use-cases**. + +Typical paths in qcom-linux-testkit: + +```text +suites/Performance/Boot_Systemd_Validate/run.sh +suites/Performance/Boot_Systemd_KPI_Loop/run.sh +``` + +--- + +1. `Boot_Systemd_Validate` – Per-boot KPI collector +--------------------------------------------------- + +**Path (example):** + +```text +suites/Performance/Boot_Systemd_Validate/run.sh +``` + +### Purpose + +Runs **once per boot** and collects detailed systemd boot KPIs: + +- `systemd-analyze time` (parsed into firmware/loader/kernel/userspace/total) +- `systemd-analyze blame` (full + top-20) +- `systemd-analyze critical-chain` +- `systemd-analyze plot` → `boot_analysis.svg` (optional) +- `systemd-analyze dot` → `boot.dot` +- `systemctl` unit dependency trees and per-unit state CSV +- Journals: full boot, warnings, errors (when `journalctl` is available) +- Optional **gating on required units** (e.g. “all critical services must be active”) +- **UEFI loader timings** from efivars (Init/Exec/Total) when EFI vars exist +- **Exclusion of slow services** from userspace/total (e.g. `systemd-networkd-wait-online.service`) + +All logs are stored under a test-local directory: + +```text +./logs_Boot_Systemd_Validate/ +``` + +When `--iterations N` is passed, the script still runs **once**, but includes +this hint in the KPI output so that the KPI loop wrapper knows the intended +window size. + +--- + +### Usage (CLI help) + +The script has a built-in help that matches the implementation: + +```text +Usage: ./run.sh [OPTIONS] + +Options: + --out DIR Output directory for logs (default: ./logs_Boot_Systemd_Validate) + --required FILE File listing systemd units that must become active + --timeout S Timeout per required unit (seconds, default: $TIMEOUT_PER_UNIT) + --no-svg Skip systemd-analyze plot SVG generation + --boot-type TYPE Tag boot type (e.g. cold, warm, unknown) + --disable-getty Disable serial-getty@ttyS0.service for this KPI run + --disable-sshd Disable sshd.service for this KPI run + + --exclude-networkd-wait-online + Exclude systemd-networkd-wait-online.service time + from userspace/total based on systemd-analyze blame + + --exclude-services "svc1 svc2 ..." + Exclude one or more services (matching names in + systemd-analyze blame) from userspace/total. + The summed time is subtracted and reported as + an effective KPI. + + --iterations N Hint for KPI iterations (wrapper/LAVA metadata; this + script still runs once per invocation) + + --verbose Dump key .txt artifacts from OUT_DIR to console for + LAVA debugging (skips large journal_*.txt files) + + -h, --help Show this help and exit +``` + +**Environment knobs (optional):** + +- `TIMEOUT_PER_UNIT` – default per-unit wait time for `--required` +- `SVG=yes|no` – default for SVG generation (overridden by `--no-svg`) +- `BOOT_TYPE` – default boot type tag (overridden by `--boot-type`) +- `BOOT_KPI_ITERATIONS` – default for the `iterations` field in the KPI output + +--- + +### Outputs / Artifacts + +All written under `OUT_DIR` (default: `./logs_Boot_Systemd_Validate`): + +- Platform + metadata + - `platform.txt`, `platform.json` + - `clocksource.txt` (current clocksource) + - `boot_type.txt` (e.g. `cold`, `warm`, `unknown`) + +- Units & dependencies + - `sysinit_deps.txt`, `basic_deps.txt` + - `units.list` + - `unit_states.csv` (per-unit state/export from `systemctl show`) + +- Systemd timing & graphs + - `analyze_time.txt` (raw `systemd-analyze time` output) + - `blame.txt`, `blame_top20.txt` + - `critical_chain.txt` + - `boot_analysis.svg` (unless `--no-svg`) + - `boot.dot` + +- Journals + - `journal_boot.txt` – full boot journal + - `journal_warn.txt` – warnings and above + - `journal_err.txt` – errors and above + +- Bootchart (if enabled via `init=/lib/systemd/systemd-bootchart`) + - `bootchart.tgz` (if present under `/run/log/...`) + +- Required units + - `failed_units.txt` (from `systemctl --failed`) + +- **KPI breakdown (this run)** + - `boot_kpi_this_run.txt` – structured, human-readable KPI summary + +--- + +### KPI breakdown: fields and exclusions + +At the end of the run, the script prints a KPI summary **to console** and +writes the same content into `boot_kpi_this_run.txt`, for example: + +```text +Boot KPI (this run) + boot_type : cold + iterations : 5 + clocksource : arch_sys_counter + uefi_time_sec : 438093.283 (Init=214751.707, Exec=223341.576) + firmware_time_sec : 3.765 + bootloader_time_sec : 0.176 + kernel_time_sec : 6.124 + userspace_time_sec : 126.942 + userspace_effective_time_sec : 6.825 + boot_total_sec : 137.008 + boot_total_effective_sec : 16.891 +``` + +Fields: + +- `uefi_time_sec` + Sum of UEFI loader Init+Exec time in seconds, derived from EFI vars: + + - `LoaderTimeInitUSec-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f` + - `LoaderTimeExecUSec-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f` + + with individual Init/Exec components also printed. + +- `firmware_time_sec`, `bootloader_time_sec`, `kernel_time_sec`, + `userspace_time_sec`, `boot_total_sec` + Parsed from `systemd-analyze time`: + + ```text + Startup finished in 3.801s (firmware) + 174ms (loader) + 6.106s (kernel) + 2min 7.045s (userspace) = 2min 17.127s + ``` + +- `userspace_effective_time_sec`, `boot_total_effective_sec` + + These are derived from the raw userspace/total time by subtracting: + + 1. `systemd-networkd-wait-online.service` time when + `--exclude-networkd-wait-online` is passed. + 2. Any additional services given via `--exclude-services "svc1 svc2"`. + +The script logs exclusions clearly, for example: + +```text +[INFO] ... Excluded systemd-networkd-wait-online.service=120.117s from userspace/total; boot_total_effective_sec=16.891 +[INFO] ... Excluded services from userspace/total (sum=2.500s): docker.service=0.966s; NetworkManager.service=1.534s; boot_total_effective_sec=14.391 +``` + +If `systemd-analyze time` reports: + +```text +Bootup is not yet finished (org.freedesktop.systemd1.Manager.FinishTimestampMonotonic=0). +``` + +the script: + +- Marks the timing fields as `unknown`. +- Logs the active jobs from `systemctl list-jobs` to **console** so that + blocking services (including our own KPI service if misconfigured) are + visible during LAVA debugging. + +This diagnostic logging happens **even without `--verbose`**. + +--- + +### Verbose mode (`--verbose`) + +When `--verbose` is set, the script: + +- Prints all “reasonable” `.txt` artifacts from `OUT_DIR` to console + (excluding `journal_*.txt` for size reasons). +- This is intended for LAVA and other CI where you cannot easily inspect the + filesystem but can scroll the job log. + +Example tail of the verbose section: + +```text +[INFO] ... Verbose mode: dumping text artifacts from ./logs_Boot_Systemd_Validate (excluding journal_*.txt) +===== analyze_time.txt ===== +Startup finished in ... +... +===== boot_kpi_this_run.txt ===== +Boot KPI (this run) + ... +``` + +--- + +### Typical usage examples + +**1) Basic per-boot KPI with required units** + +```sh +./run.sh --timeout 60 --required required-units.txt +``` + +**2) Cold-boot KPI, excluding networkd-wait-online + Docker/Weston** + +```sh +./run.sh --boot-type cold --disable-getty --exclude-networkd-wait-online --exclude-services "docker.service weston.service" +``` + +**3) LAVA-friendly verbose run** + +```sh +./run.sh --boot-type warm --disable-getty --exclude-networkd-wait-online --iterations 5 --verbose +``` + +In all cases, the main KPI is in `logs_Boot_Systemd_Validate/boot_kpi_this_run.txt` +and echoed to console. + +--- + +2. `Boot_Systemd_KPI_Loop` – Reboot loop wrapper & KPI aggregator +----------------------------------------------------------------- + +**Path (example):** + +```text +suites/Performance/Boot_Systemd_KPI_Loop/run.sh +``` + +### Purpose + +A **thin wrapper** that drives multiple KPI iterations across reboots and +computes averages over the last **N boots** of a given `boot_type`. + +On each (re)boot it: + +1. Loads state from `Boot_Systemd_KPI_Loop.state` (if present) to determine: + - Total iterations requested + - Iterations already completed + - Boot type & options + - KPI script path + base out dir +2. Computes **this iteration index**, and a per-iteration out dir: + + ```text + /iter_ + ``` + +3. Calls `Boot_Systemd_Validate/run.sh` once with: + - `--out /iter_N` + - `--boot-type ` + - `--iterations ` + - Forwarded flags (`--disable-getty`, `--exclude-...`, `--verbose`, etc.) +4. Parses `boot_kpi_this_run.txt` for this iteration, appends a row into: + + ```text + Boot_Systemd_KPI_stats.csv + ``` + +5. Computes averages over the last **N entries** for this `boot_type` and writes: + + ```text + Boot_Systemd_KPI_summary.txt + ``` + +6. In **auto-reboot mode**, if more iterations are pending: + - Updates `Boot_Systemd_KPI_Loop.state` + - Triggers a reboot + - A small systemd service (`boot-systemd-kpi-loop.service`) invokes this + script again on the next boot until all iterations complete. + +When all iterations finish, the wrapper: + +- Prints the KPI average summary to console. +- Leaves `.csv` and `.summary.txt` for further analysis. +- Cleans up the systemd hook + state file in auto-reboot mode. + +--- + +### Usage (CLI help) + +```text +Usage: ./run.sh [OPTIONS] + +This wrapper: + * Runs Boot_Systemd_Validate once for the *current boot* + * Uses a per-iteration KPI out dir when --iterations > 1: + base: ../Boot_Systemd_Validate/logs_Boot_Systemd_Validate + iter: /iter_ + * Parses boot_kpi_this_run.txt from that test + * Appends a row into Boot_Systemd_KPI_stats.csv + * Computes averages over the last N boots (per boot_type) and prints summary. + +Options: + --kpi-script PATH Override Boot_Systemd_Validate script path + (default: ../Boot_Systemd_Validate/run.sh) + + --kpi-out-dir DIR Override base KPI output dir + (default: ../Boot_Systemd_Validate/logs_Boot_Systemd_Validate) + + --iterations N Number of boots to average over (default: 1) + --boot-type TYPE Tag for this run (e.g. cold, warm, unknown) + + # Options forwarded to Boot_Systemd_Validate: + --disable-getty Disable serial-getty@ttyS0.service + --disable-sshd Disable sshd.service + --exclude-networkd-wait-online + Exclude systemd-networkd-wait-online.service + --exclude-services "A B" + Exclude these services from userspace/total + --no-svg Disable SVG plot generation + --verbose Print KPI .txt artifacts to console for debug + + # Auto-reboot orchestration: + --auto-reboot Install systemd hook and auto-reboot until + --iterations boots are collected. State is + stored in: Boot_Systemd_KPI_Loop.state + + -h, --help Show this help and exit +``` + +--- + +### Files written by the loop wrapper + +Under the same directory as `Boot_Systemd_KPI_Loop/run.sh`: + +- `Boot_Systemd_KPI_Loop.res` + PASS/FAIL status for the wrapper itself. + +- `Boot_Systemd_KPI_Loop.state` + Persistent state across reboots (total iterations, done so far, boot_type, + options, KPI script path/out dir). Removed automatically when all iterations + complete or on error. + +- `Boot_Systemd_KPI_stats.csv` + Rolling KPI database across boots. Each row corresponds to the parsed + `boot_kpi_this_run.txt` of one boot (for a given `boot_type`). + +- `Boot_Systemd_KPI_summary.txt` + Human-readable summary of averages over the last **N** entries of that + `boot_type`, e.g.: + + ```text + Boot KPI summary (last 5 cold boot(s)) + entries_used : 5 + target_iterations : 5 + boot_type : cold + avg_uefi_time_sec : ... + avg_firmware_time_sec : ... + avg_bootloader_time_sec : ... + avg_kernel_time_sec : ... + avg_userspace_time_sec : ... + avg_userspace_effective_time_sec : ... + avg_boot_total_sec : ... + avg_boot_total_effective_sec : ... + ``` + +- `Boot_Systemd_KPI_Loop_stdout_.log` + Stdout/stderr log(s) for the wrapper itself (if you preserve them). + +Per-iteration artifacts from `Boot_Systemd_Validate` live under: + +```text +../Boot_Systemd_Validate/logs_Boot_Systemd_Validate/iter_1/ +../Boot_Systemd_Validate/logs_Boot_Systemd_Validate/iter_2/ +... +``` + +Each `iter_N` has its own `boot_kpi_this_run.txt`, `analyze_time.txt`, etc. + +--- + +### Auto-reboot mode details + +When `--auto-reboot` is passed: + +- The wrapper installs a small systemd service (e.g. `boot-systemd-kpi-loop.service`) + that runs the wrapper at boot. +- On each boot, the wrapper: + - Runs `Boot_Systemd_Validate` once. + - Updates the `.state` file with the new iteration count. + - If more iterations are required, it requests `reboot` again. +- After the final iteration: + - KPI averages are computed and printed. + - The systemd hook is removed. + - The state file is deleted. + +The reboot logic is designed to: + +- Ensure the reboot actually happens (falling back between `reboot` and `/sbin/reboot`). +- Avoid blocking `systemd-analyze` permanently: the KPI scripts finish quickly, + and if any unit (including our own) prevents boot from completing, it will + show up in the “Bootup is not yet finished … list-jobs” diagnostics inside + each `iter_N/analyze_time.txt` and in the **console logs**. + +--- + +### Typical usage examples + +**1) Manual KPI over last 5 cold boots (no auto-reboot)** + +You manually reboot the board between runs: + +```sh +# Boot 1 (cold boot) +./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online + +# Reboot the board manually (power-cycle or reboot) + +# Boot 2..5 – re-run the same command each time +./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online +... +``` + +After the 5th run, `Boot_Systemd_KPI_summary.txt` will contain the averages over +the last 5 `cold` entries. + +**2) Fully automated cold-boot KPI campaign (auto-reboot)** + +```sh +./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online --auto-reboot +``` + +The wrapper will: + +- Run `Boot_Systemd_Validate` on this boot. +- Reboot automatically until 5 iterations are captured. +- Finally, print a KPI summary and clean up the systemd hook/state. + +**3) Warm-boot KPI with extra service exclusions and verbose logs** + +```sh +./run.sh --iterations 3 --boot-type warm --disable-getty --exclude-networkd-wait-online --exclude-services "docker.service weston.service" --auto-reboot --verbose +``` + +This gives: + +- Per-iteration directories: `iter_1`, `iter_2`, `iter_3`. +- Detailed logs printed to console from `Boot_Systemd_Validate` via `--verbose`. +- Aggregated averages in `Boot_Systemd_KPI_summary.txt`. + +--- + +3. Which one should I use? +-------------------------- + +| Scenario | Recommended test | Notes | +|----------------------------------------------|---------------------------------------|-----------------------------------------------------------------------| +| Standard CI pipeline (no reboot-resume) | `Boot_Systemd_Validate` | Run once per job; no reboot inside the script. | +| Manual KPI measurement on a single boot | `Boot_Systemd_Validate` | E.g. after changing kernel/systemd configs. | +| Quick health-check of systemd units | `Boot_Systemd_Validate` | Use `--required` to gate on critical services. | +| Lab KPI across N cold/warm boots | `Boot_Systemd_KPI_Loop` | Wrapper handles per-boot dirs + CSV + averages; you may reboot manually. | +| Automated multi-boot campaign in lab | `Boot_Systemd_KPI_Loop` with `--auto-reboot` | State file + systemd hook handle the full loop. | +| CI with explicit reboot-resume support | `Boot_Systemd_KPI_Loop` (if allowed) | CI must re-run the script after each reboot. | + +--- + +4. Design principles +-------------------- + +- **Single responsibility** + - `Boot_Systemd_Validate`: _measure one boot and emit KPIs_. + - `Boot_Systemd_KPI_Loop`: _across boots: state, reboots, aggregation_. + +- **CI friendliness** + - CI that cannot handle reboots should only use `Boot_Systemd_Validate`. + - Reboot orchestration via `--auto-reboot` is explicitly opt-in. + +- **Robust & transparent** + - Rolling CSV + summary for long-term trends. + - Clear console logs for: + - service time exclusions, + - non-finished boots (`Bootup is not yet finished` + `systemctl list-jobs`), + - per-iteration KPI values. + +- **Local logs only** + - All artifacts (CSV, SVG, journals, etc.) are stored under the test’s + working directory, making log collection and LAVA parsing straightforward. diff --git a/Runner/suites/Performance/Boot_Systemd_KPI_Loop/run.sh b/Runner/suites/Performance/Boot_Systemd_KPI_Loop/run.sh new file mode 100755 index 00000000..0c836166 --- /dev/null +++ b/Runner/suites/Performance/Boot_Systemd_KPI_Loop/run.sh @@ -0,0 +1,388 @@ +#!/bin/sh +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause-Clear +# Boot KPI multi-boot aggregator / auto-reboot wrapper around Boot_Systemd_Validate. + +SCRIPT_DIR="$( + cd "$(dirname "$0")" || exit 1 + pwd +)" + +TESTNAME="Boot_Systemd_KPI_Loop" +RES_FILE="./${TESTNAME}.res" + +# Default KPI script + base out dir (for iteration subfolders) +KPI_SCRIPT_DEFAULT="$SCRIPT_DIR/../Boot_Systemd_Validate/run.sh" +KPI_OUT_DIR_DEFAULT="$SCRIPT_DIR/../Boot_Systemd_Validate/logs_Boot_Systemd_Validate" + +# Defaults (env may override; CLI parsing later overrides again) +KPI_SCRIPT="${KPI_SCRIPT:-$KPI_SCRIPT_DEFAULT}" +KPI_OUT_DIR="${KPI_OUT_DIR:-$KPI_OUT_DIR_DEFAULT}" +ITERATIONS="${ITERATIONS:-1}" +BOOT_TYPE="${BOOT_TYPE:-unknown}" + +DISABLE_GETTY="${DISABLE_GETTY:-0}" +DISABLE_SSHD="${DISABLE_SSHD:-0}" +EXCLUDE_NETWORKD_WAIT_ONLINE="${EXCLUDE_NETWORKD_WAIT_ONLINE:-0}" +EXCLUDE_SERVICES="${EXCLUDE_SERVICES:-}" +NO_SVG="${NO_SVG:-0}" +AUTO_REBOOT="${AUTO_REBOOT:-0}" +VERBOSE="${VERBOSE:-0}" + +STATE_FILE="$SCRIPT_DIR/Boot_Systemd_KPI_Loop.state" +KPI_REBOOT_STATE_FILE="$SCRIPT_DIR/Boot_Systemd_KPI_reboot.state" +SERVICE_NAME="${SERVICE_NAME:-boot-systemd-kpi-loop}" +STATS_CSV="$SCRIPT_DIR/Boot_Systemd_KPI_stats.csv" +SUMMARY_FILE="$SCRIPT_DIR/Boot_Systemd_KPI_summary.txt" + +# Optional: allow caller to choose whether to treat reboot as PASS/SKIP in LAVA +# (default PASS so LAVA won't fail the run during reboot) +REBOOT_RESULT_MODE="${REBOOT_RESULT_MODE:-PASS}" + +usage() { + cat < 1: + base: $KPI_OUT_DIR_DEFAULT + iter: /iter_ + * Parses boot_kpi_this_run.txt from that test + * Appends a row into ${STATS_CSV##*/} + * Computes averages over the last N boots (per boot_type) and prints summary. + +Options: + --kpi-script PATH Override Boot_Systemd_Validate script path + (default: $KPI_SCRIPT_DEFAULT) + --kpi-out-dir DIR Override base KPI output dir + (default: $KPI_OUT_DIR_DEFAULT) + --iterations N Number of boots to average over (default: 1) + --boot-type TYPE Tag for this run (e.g. cold, warm, unknown) + + # Options forwarded to Boot_Systemd_Validate: + --disable-getty Disable serial-getty@ttyS0.service + --disable-sshd Disable sshd.service + --exclude-networkd-wait-online Exclude systemd-networkd-wait-online.service + --exclude-services "A B" Exclude these services from userspace/total + --no-svg Disable SVG plot generation + --verbose Print KPI .txt artifacts to console for debug + + # Auto-reboot orchestration: + --auto-reboot Install systemd hook and auto-reboot until + --iterations boots are collected. State is + stored in: $STATE_FILE + + -h, --help Show this help and exit + +Example (single run, average over last 5 boots of this type): + ./run.sh --iterations 5 --boot-type cold --disable-getty --exclude-networkd-wait-online + +Auto-reboot mode (script installs systemd hook + reboots until N boots done): + ./run.sh --iterations 5 --boot-type cold --disable-getty \\ + --exclude-networkd-wait-online --auto-reboot +EOF +} + +# EARLY help handling: do this BEFORE init_env/functestlib stdout capture +case "${1:-}" in + -h|--help) + usage >&2 + exit 0 + ;; +esac + +# --- locate and source init_env → functestlib.sh + lib_performance.sh --- +INIT_ENV="" +SEARCH="$SCRIPT_DIR" + +while [ "$SEARCH" != "/" ]; do + if [ -f "$SEARCH/init_env" ]; then + INIT_ENV="$SEARCH/init_env" + break + fi + SEARCH=$(dirname "$SEARCH") +done + +if [ -z "$INIT_ENV" ]; then + echo "[ERROR] Could not find init_env (starting at $SCRIPT_DIR)" >&2 + exit 1 +fi + +# Only source once (idempotent) +# NOTE: We intentionally **do not export** any new vars. They stay local to this shell. +if [ -z "${__INIT_ENV_LOADED:-}" ]; then + # shellcheck disable=SC1090 + . "$INIT_ENV" + __INIT_ENV_LOADED=1 +fi + +# shellcheck disable=SC1090 +. "$INIT_ENV" +# shellcheck disable=SC1091 +. "$TOOLS/functestlib.sh" +# shellcheck disable=SC1091 +. "$TOOLS/lib_performance.sh" + +# --- CLI parsing --- +while [ "$#" -gt 0 ]; do + case "$1" in + --kpi-script) + shift + KPI_SCRIPT=$1 + ;; + --kpi-out-dir) + shift + KPI_OUT_DIR=$1 + ;; + --iterations) + shift + ITERATIONS=$1 + ;; + --boot-type) + shift + BOOT_TYPE=$1 + ;; + --disable-getty) + DISABLE_GETTY=1 + ;; + --disable-sshd) + DISABLE_SSHD=1 + ;; + --exclude-networkd-wait-online) + EXCLUDE_NETWORKD_WAIT_ONLINE=1 + ;; + --exclude-services) + shift + EXCLUDE_SERVICES=$1 + ;; + --no-svg) + NO_SVG=1 + ;; + --auto-reboot) + AUTO_REBOOT=1 + ;; + --verbose) + VERBOSE=1 + ;; + -h|--help) + usage >&2 + exit 0 + ;; + *) + log_warn "Unknown option: $1" + usage >&2 + echo "$TESTNAME FAIL" >"$RES_FILE" + exit 1 + ;; + esac + shift +done + +# Validate iterations +case "$ITERATIONS" in + ''|*[!0-9]*) + log_warn "Non-numeric --iterations; defaulting to 1" + ITERATIONS=1 + ;; +esac +if [ "$ITERATIONS" -lt 1 ] 2>/dev/null; then + ITERATIONS=1 +fi + +# NEW: auto-enable auto-reboot mode when state exists +if [ "$AUTO_REBOOT" -eq 0 ] && [ -f "$STATE_FILE" ]; then + AUTO_REBOOT=1 +fi + +# If we are in auto-reboot mode, first verify whether a previous reboot actually happened. +if [ "$AUTO_REBOOT" -eq 1 ]; then + perf_kpi_check_previous_reboot "$KPI_REBOOT_STATE_FILE" +fi + +# Always log current boot identity for debugging / LAVA traces +perf_kpi_get_boot_identity +log_info "$TESTNAME: boot identity → boot_id=${PERF_KPI_BOOT_ID:-unknown} uptime=${PERF_KPI_UPTIME_SEC:-unknown}s" + +# Validate KPI script +if [ ! -x "$KPI_SCRIPT" ]; then + log_error "KPI script not executable or missing: $KPI_SCRIPT" + echo "$TESTNAME FAIL" >"$RES_FILE" + exit 1 +fi + +mkdir -p "$KPI_OUT_DIR" 2>/dev/null || true + +CURRENT_DONE=0 + +# --- Auto-reboot: load or initialise state --- +if [ "$AUTO_REBOOT" -eq 1 ]; then + if perf_kpi_load_loop_state "$STATE_FILE"; then + # Reuse knobs from state + if [ -n "${KPI_LOOP_ITERATIONS_TOTAL:-}" ]; then + ITERATIONS=$KPI_LOOP_ITERATIONS_TOTAL + fi + if [ -n "${KPI_LOOP_BOOT_TYPE:-}" ]; then + BOOT_TYPE=$KPI_LOOP_BOOT_TYPE + fi + if [ -n "${KPI_LOOP_KPI_SCRIPT:-}" ]; then + KPI_SCRIPT=$KPI_LOOP_KPI_SCRIPT + fi + if [ -n "${KPI_LOOP_KPI_OUT_DIR:-}" ]; then + KPI_OUT_DIR=$KPI_LOOP_KPI_OUT_DIR + fi + DISABLE_GETTY=${KPI_LOOP_DISABLE_GETTY:-0} + DISABLE_SSHD=${KPI_LOOP_DISABLE_SSHD:-0} + EXCLUDE_NETWORKD_WAIT_ONLINE=${KPI_LOOP_EXCLUDE_NETWORKD:-0} + EXCLUDE_SERVICES=${KPI_LOOP_EXCLUDE_SERVICES:-} + CURRENT_DONE=${KPI_LOOP_ITERATIONS_DONE:-0} + else + # First time in auto-reboot mode + CURRENT_DONE=0 + perf_kpi_write_loop_state "$STATE_FILE" "$ITERATIONS" "$CURRENT_DONE" \ + "$BOOT_TYPE" "$DISABLE_GETTY" "$DISABLE_SSHD" \ + "$EXCLUDE_NETWORKD_WAIT_ONLINE" "$EXCLUDE_SERVICES" \ + "$KPI_SCRIPT" "$KPI_OUT_DIR" + perf_install_kpi_systemd_hook "$SCRIPT_DIR/run.sh" "$SERVICE_NAME" + fi +fi + +log_info "$TESTNAME: starting KPI aggregation (boot_type=$BOOT_TYPE, iterations_window=$ITERATIONS, auto_reboot=$AUTO_REBOOT, verbose=$VERBOSE)" +log_info "$TESTNAME: KPI script → $KPI_SCRIPT" +log_info "$TESTNAME: KPI base out dir → $KPI_OUT_DIR" +log_info "$TESTNAME: iterations already done (from state) = $CURRENT_DONE" + +# --- Determine this iteration index and concrete out-dir --- +THIS_ITER=1 +if [ "$AUTO_REBOOT" -eq 1 ]; then + THIS_ITER=$((CURRENT_DONE + 1)) +fi + +RUN_OUT_DIR="$KPI_OUT_DIR" +if [ "$ITERATIONS" -gt 1 ] 2>/dev/null; then + RUN_OUT_DIR="$KPI_OUT_DIR/iter_${THIS_ITER}" +fi +mkdir -p "$RUN_OUT_DIR" 2>/dev/null || true +log_info "$TESTNAME: this iteration=$THIS_ITER, KPI out dir for this run → $RUN_OUT_DIR" + +# --- Build argv for Boot_Systemd_Validate --- +KPI_ARGS="--out $RUN_OUT_DIR --boot-type $BOOT_TYPE --iterations $ITERATIONS" +if [ "$DISABLE_GETTY" -eq 1 ]; then + KPI_ARGS="$KPI_ARGS --disable-getty" +fi +if [ "$DISABLE_SSHD" -eq 1 ]; then + KPI_ARGS="$KPI_ARGS --disable-sshd" +fi +if [ "$EXCLUDE_NETWORKD_WAIT_ONLINE" -eq 1 ]; then + KPI_ARGS="$KPI_ARGS --exclude-networkd-wait-online" +fi +if [ -n "$EXCLUDE_SERVICES" ]; then + KPI_ARGS="$KPI_ARGS --exclude-services \"$EXCLUDE_SERVICES\"" +fi +if [ "$NO_SVG" -eq 1 ]; then + KPI_ARGS="$KPI_ARGS --no-svg" +fi +if [ "$VERBOSE" -eq 1 ]; then + KPI_ARGS="$KPI_ARGS --verbose" +fi + +# --- Invoke Boot_Systemd_Validate for this boot --- +log_info "$TESTNAME: invoking KPI script: $KPI_SCRIPT $KPI_ARGS" + +# We use 'sh -c' to keep quoting of EXCLUDE_SERVICES intact if present. +# shellcheck disable=SC2086 +sh -c "\"$KPI_SCRIPT\" $KPI_ARGS" +rc=$? + +if [ "$rc" -ne 0 ]; then + log_fail "$TESTNAME: KPI script failed with rc=$rc" + echo "$TESTNAME FAIL" >"$RES_FILE" + exit "$rc" +fi + +# --- Parse this-run KPI file from this iteration OUT dir --- +KPI_FILE="$RUN_OUT_DIR/boot_kpi_this_run.txt" +if [ ! -f "$KPI_FILE" ]; then + log_fail "$TESTNAME: KPI file not found for this iteration: $KPI_FILE" + echo "$TESTNAME FAIL" >"$RES_FILE" + exit 1 +fi + +perf_kpi_extract_from_file "$KPI_FILE" + +# If Boot_Systemd_Validate wrote empty boot_type, fall back to CLI boot_type +if [ -z "${PERF_KPI_BOOT_TYPE:-}" ]; then + PERF_KPI_BOOT_TYPE="$BOOT_TYPE" +fi + +log_info "$TESTNAME: parsed KPI for this boot (iter=$THIS_ITER, boot_type=$PERF_KPI_BOOT_TYPE, total_sec=${PERF_KPI_BOOT_TOTAL_SEC:-unknown}, total_eff_sec=${PERF_KPI_BOOT_TOTAL_EFFECTIVE_SEC:-unknown})" + +if [ "$VERBOSE" -eq 1 ]; then + echo "================ boot_kpi_this_run.txt (from $KPI_FILE) ================" + cat "$KPI_FILE" + echo "=======================================================================" +fi + +# --- Append CSV row (global stats CSV under Boot_Systemd_KPI_Loop) --- +perf_kpi_append_csv_row "$STATS_CSV" "$PERF_KPI_BOOT_TYPE" + +# --- Compute averages over last N boots for this boot_type --- +if perf_kpi_compute_average "$STATS_CSV" "$PERF_KPI_BOOT_TYPE" "$ITERATIONS" "$SUMMARY_FILE"; then + if [ -f "$SUMMARY_FILE" ]; then + echo "================ KPI AVERAGE SUMMARY ================" + cat "$SUMMARY_FILE" + echo "=====================================================" + fi +else + log_warn "$TESTNAME: could not compute KPI averages (maybe not enough entries yet)." +fi + +if [ "$VERBOSE" -eq 1 ]; then + if [ -f "$STATS_CSV" ]; then + echo "================ Last KPI CSV rows ($STATS_CSV) =======================" + tail -n 5 "$STATS_CSV" 2>/dev/null || cat "$STATS_CSV" + echo "=======================================================================" + fi +fi + +# --- Auto-reboot decision & cleanup --- +if [ "$AUTO_REBOOT" -eq 1 ]; then + NEW_DONE=$((CURRENT_DONE + 1)) + perf_kpi_write_loop_state "$STATE_FILE" "$ITERATIONS" "$NEW_DONE" \ + "$BOOT_TYPE" "$DISABLE_GETTY" "$DISABLE_SSHD" \ + "$EXCLUDE_NETWORKD_WAIT_ONLINE" "$EXCLUDE_SERVICES" \ + "$KPI_SCRIPT" "$KPI_OUT_DIR" + + if [ "$NEW_DONE" -lt "$ITERATIONS" ]; then + # Prepare reboot tracking state so next boot can verify it succeeded + perf_kpi_get_boot_identity + perf_kpi_reboot_state_save \ + "$KPI_REBOOT_STATE_FILE" \ + "$PERF_KPI_BOOT_ID" \ + "${PERF_KPI_UPTIME_SEC:-}" \ + "1" \ + "$NEW_DONE" + + log_info "$TESTNAME: completed iteration $NEW_DONE/$ITERATIONS; requesting reboot for next KPI iteration." + log_info "$TESTNAME: current boot_id=$PERF_KPI_BOOT_ID uptime=${PERF_KPI_UPTIME_SEC:-unknown}s" + + perf_kpi_request_reboot "Boot_Systemd_KPI_Loop auto-reboot for next KPI iteration" + + # If we are still alive here, reboot did not occur immediately; exit and let systemd/LAVA retry. + if [ "$REBOOT_RESULT_MODE" = "SKIP" ]; then + log_skip "$TESTNAME: reboot requested for next iteration ($NEW_DONE/$ITERATIONS)" + echo "$TESTNAME SKIP" >"$RES_FILE" + else + echo "$TESTNAME PASS" >"$RES_FILE" + fi + exit 0 + else + log_info "$TESTNAME: all iterations completed ($NEW_DONE/$ITERATIONS); cleaning up auto-reboot hook." + perf_remove_kpi_systemd_hook "$SERVICE_NAME" + rm -f "$STATE_FILE" "$KPI_REBOOT_STATE_FILE" 2>/dev/null || true + fi +fi + +log_pass "$TESTNAME: PASS" +echo "$TESTNAME PASS" >"$RES_FILE" +exit 0