From a2fec5e4c857a41d5674c2f42fa3d722fbe24804 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Wed, 21 Jan 2026 19:15:12 +0100 Subject: [PATCH 01/10] Translate comments and docs from Italian to English Updated code comments, documentation, and user-facing strings from Italian to English across multiple files for improved accessibility and consistency. No functional changes were made to the code. --- docs/EXAMPLES.md | 24 +++---- docs/RESTORE_GUIDE.md | 20 +++--- internal/logging/bootstrap.go | 20 +++--- internal/logging/logger.go | 58 ++++++++-------- internal/notify/telegram_registration.go | 6 +- internal/orchestrator/.backup.lock | 4 +- .../orchestrator/directory_recreation_test.go | 24 +++---- internal/orchestrator/extensions.go | 6 +- internal/orchestrator/restore_filesystem.go | 20 +++--- internal/orchestrator/restore_tui.go | 12 ++-- internal/orchestrator/storage_adapter.go | 4 +- internal/storage/retention_normalize.go | 8 +-- internal/types/common.go | 66 +++++++++---------- pkg/utils/files.go | 14 ++-- pkg/utils/strings.go | 12 ++-- 15 files changed, 149 insertions(+), 149 deletions(-) diff --git a/docs/EXAMPLES.md b/docs/EXAMPLES.md index c104a2c..0a53278 100644 --- a/docs/EXAMPLES.md +++ b/docs/EXAMPLES.md @@ -853,39 +853,39 @@ CLOUD_LOG_PATH= ## Example 9: Test in a Chroot/Fixture -**Scenario**: Esegui la raccolta su un root alternativo (chroot, snapshot montato, fixture di test) senza toccare il filesystem live. +**Scenario**: Run collection against an alternate system root (chroot, mounted snapshot, test fixture) without touching the live filesystem. **Use case**: -- CI/test di backup in ambiente isolato -- Analisi offline di un'immagine/snapshot montata -- Esecuzione in container che monta un root diverso +- CI/test backups in an isolated environment +- Offline analysis of a mounted image/snapshot +- Running inside a container that mounts a different root ### Configuration ```bash # configs/backup.env -SYSTEM_ROOT_PREFIX=/mnt/snapshot-root # punta al root alternativo +SYSTEM_ROOT_PREFIX=/mnt/snapshot-root # points to the alternate root BACKUP_ENABLED=true ENABLE_GO_BACKUP=true -# /etc, /var, /root, /home vengono risolti sotto il prefisso +# /etc, /var, /root, /home are resolved under the prefix ``` ### Setup Steps ```bash -# 1) Monta o prepara il root alternativo -mount /dev/vg0/snap /mnt/snapshot-root # esempio +# 1) Mount or prepare the alternate root +mount /dev/vg0/snap /mnt/snapshot-root # example -# 2) Esegui un dry-run +# 2) Run a dry-run SYSTEM_ROOT_PREFIX=/mnt/snapshot-root ./build/proxsave --dry-run -# 3) Esegui il backup reale (opzionale) +# 3) Run the actual backup (optional) SYSTEM_ROOT_PREFIX=/mnt/snapshot-root ./build/proxsave ``` ### Expected Results -- I file raccolti riflettono il contenuto di `/mnt/snapshot-root/etc`, `/var`, `/root`, `/home`, ecc. -- Nessuna scrittura sul filesystem live del nodo. +- Collected files reflect the contents of `/mnt/snapshot-root/etc`, `/var`, `/root`, `/home`, etc. +- No writes to the node's live filesystem. --- diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index d35414b..92f057c 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -2470,24 +2470,24 @@ A: Yes, in two ways: CLOUD_REMOTE_PATH=/pbs-backups/server1 ``` - - During `--decrypt` or `--restore` (CLI o TUI), ProxSave leggerà la stessa - combinazione `CLOUD_REMOTE` / `CLOUD_REMOTE_PATH` e mostrerà una voce: + - During `--decrypt` or `--restore` (CLI or TUI), ProxSave will read the same + `CLOUD_REMOTE` / `CLOUD_REMOTE_PATH` combination and show an entry: - `Cloud backups (rclone)` - - Selezionandola, il tool: - - elenca i bundle `.bundle.tar` sul remote con `rclone lsf`; - - legge i metadata/manifest tramite `rclone cat` (senza scaricare tutto); - - quando scegli un backup, lo scarica in `/tmp/proxsave` e procede al decrypt/restore. + - When selected, the tool: + - lists `.bundle.tar` bundles on the remote with `rclone lsf`; + - reads metadata/manifest via `rclone cat` (without downloading everything); + - when you pick a backup, downloads it to `/tmp/proxsave` and proceeds with decrypt/restore. -2. **Da mount locale rclone (solo restore)** - Se preferisci montare il backend rclone come filesystem locale: +2. **From a local rclone mount (restore-only)** + If you prefer to mount the rclone backend as a local filesystem: ```bash # Mount cloud storage locally rclone mount remote:bucket /mnt/cloud & # Configure in backup.env (restore-only scenario) - CLOUD_ENABLED=false # cloud upload disabilitato - # Usa BACKUP_PATH / SECONDARY_PATH oppure naviga il mount direttamente + CLOUD_ENABLED=false # cloud upload disabled + # Use BACKUP_PATH / SECONDARY_PATH or browse the mount directly ``` In questo caso puoi: diff --git a/internal/logging/bootstrap.go b/internal/logging/bootstrap.go index f1baa96..49dc83a 100644 --- a/internal/logging/bootstrap.go +++ b/internal/logging/bootstrap.go @@ -15,8 +15,8 @@ type bootstrapEntry struct { raw bool } -// BootstrapLogger accumula i log generati prima che il logger principale sia -// inizializzato, così da poterli riversare nel log finale. +// BootstrapLogger accumulates logs generated before the main logger is initialized, +// so they can be flushed into the final log. type BootstrapLogger struct { mu sync.Mutex entries []bootstrapEntry @@ -25,35 +25,35 @@ type BootstrapLogger struct { mirror *Logger } -// NewBootstrapLogger crea un nuovo bootstrap logger con livello INFO di default. +// NewBootstrapLogger creates a new bootstrap logger with INFO level by default. func NewBootstrapLogger() *BootstrapLogger { return &BootstrapLogger{ minLevel: types.LogLevelInfo, } } -// SetLevel aggiorna il livello minimo che verrà usato al flush. +// SetLevel updates the minimum level used during Flush. func (b *BootstrapLogger) SetLevel(level types.LogLevel) { b.mu.Lock() defer b.mu.Unlock() b.minLevel = level } -// Println registra una riga “raw” (usata per banner/testo senza header). +// Println records a raw line (used for banners/text without a header). func (b *BootstrapLogger) Println(message string) { fmt.Println(message) b.mirrorLog(types.LogLevelInfo, message) b.recordRaw(message) } -// Debug registra un messaggio di debug senza stamparlo a console. +// Debug records a debug message without printing it to the console. func (b *BootstrapLogger) Debug(format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) b.mirrorLog(types.LogLevelDebug, msg) b.record(types.LogLevelDebug, msg) } -// Printf registra una riga formattata come raw. +// Printf records a formatted line as raw. func (b *BootstrapLogger) Printf(format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) fmt.Println(msg) @@ -69,7 +69,7 @@ func (b *BootstrapLogger) Info(format string, args ...interface{}) { b.record(types.LogLevelInfo, msg) } -// Warning registra un messaggio di warning precoce (stampato su stderr). +// Warning records an early warning message (printed to stderr). func (b *BootstrapLogger) Warning(format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) if !strings.HasSuffix(msg, "\n") { @@ -81,7 +81,7 @@ func (b *BootstrapLogger) Warning(format string, args ...interface{}) { b.record(types.LogLevelWarning, msg) } -// Error registra un messaggio di errore precoce (stderr). +// Error records an early error message (stderr). func (b *BootstrapLogger) Error(format string, args ...interface{}) { msg := fmt.Sprintf(format, args...) if !strings.HasSuffix(msg, "\n") { @@ -112,7 +112,7 @@ func (b *BootstrapLogger) recordRaw(message string) { }) } -// Flush riversa le entry accumulate nel logger principale (solo la prima volta). +// Flush flushes accumulated entries into the main logger (only the first time). func (b *BootstrapLogger) Flush(logger *Logger) { b.mu.Lock() defer b.mu.Unlock() diff --git a/internal/logging/logger.go b/internal/logging/logger.go index c29c62b..5f000e2 100644 --- a/internal/logging/logger.go +++ b/internal/logging/logger.go @@ -10,20 +10,20 @@ import ( "github.com/tis24dev/proxsave/internal/types" ) -// Logger gestisce il logging dell'applicazione +// Logger handles application logging. type Logger struct { mu sync.Mutex level types.LogLevel useColor bool output io.Writer timeFormat string - logFile *os.File // File di log (opzionale) + logFile *os.File // Log file (optional) warningCount int64 errorCount int64 exitFunc func(int) } -// New crea un nuovo logger +// New creates a new logger. func New(level types.LogLevel, useColor bool) *Logger { return &Logger{ level: level, @@ -34,7 +34,7 @@ func New(level types.LogLevel, useColor bool) *Logger { } } -// SetOutput imposta l'output writer del logger +// SetOutput sets the logger output writer. func (l *Logger) SetOutput(w io.Writer) { l.mu.Lock() defer l.mu.Unlock() @@ -45,7 +45,7 @@ func (l *Logger) SetOutput(w io.Writer) { l.output = w } -// SetLevel imposta il livello di logging +// SetLevel sets the logging level. func (l *Logger) SetLevel(level types.LogLevel) { l.mu.Lock() defer l.mu.Unlock() @@ -64,17 +64,17 @@ func (l *Logger) SetExitFunc(fn func(int)) { l.exitFunc = fn } -// OpenLogFile apre un file di log e inizia la scrittura real-time +// OpenLogFile opens a log file and starts real-time writing. func (l *Logger) OpenLogFile(logPath string) error { l.mu.Lock() defer l.mu.Unlock() - // Se c'è già un file aperto, chiudilo prima + // If a log file is already open, close it first. if l.logFile != nil { l.logFile.Close() } - // Crea il file di log (O_CREATE|O_WRONLY|O_APPEND) - // O_SYNC forza la scrittura immediata su disco (real-time, nessun buffer) + // Create the log file (O_CREATE|O_WRONLY|O_APPEND). + // O_SYNC forces immediate writes to disk (real-time, no buffering). file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0600) if err != nil { return fmt.Errorf("failed to open log file %s: %w", logPath, err) @@ -84,7 +84,7 @@ func (l *Logger) OpenLogFile(logPath string) error { return nil } -// CloseLogFile chiude il file di log (da chiamare dopo le notifiche) +// CloseLogFile closes the log file (call after notifications). func (l *Logger) CloseLogFile() error { l.mu.Lock() defer l.mu.Unlock() @@ -97,7 +97,7 @@ func (l *Logger) CloseLogFile() error { return err } -// GetLogFilePath restituisce il path del file di log attualmente aperto (o "" se nessuno) +// GetLogFilePath returns the path of the currently open log file (or "" if none). func (l *Logger) GetLogFilePath() string { l.mu.Lock() defer l.mu.Unlock() @@ -112,14 +112,14 @@ func (l *Logger) UsesColor() bool { return l.useColor } -// GetLevel restituisce il livello corrente +// GetLevel returns the current log level. func (l *Logger) GetLevel() types.LogLevel { l.mu.Lock() defer l.mu.Unlock() return l.level } -// log è il metodo interno per scrivere i log +// log is the internal method used to write logs. func (l *Logger) log(level types.LogLevel, format string, args ...interface{}) { l.logWithLabel(level, "", "", format, args...) } @@ -169,7 +169,7 @@ func (l *Logger) logWithLabel(level types.LogLevel, label string, colorOverride } } - // Formato per stdout (con colori se abilitati) + // Format for stdout (with colors if enabled). outputStdout := fmt.Sprintf("[%s] %s%-8s%s %s\n", timestamp, colorCode, @@ -178,17 +178,17 @@ func (l *Logger) logWithLabel(level types.LogLevel, label string, colorOverride message, ) - // Formato per file (senza colori) + // Format for file (without colors). outputFile := fmt.Sprintf("[%s] %-8s %s\n", timestamp, levelStr, message, ) - // Scrivi su stdout con colori + // Write to stdout with colors. fmt.Fprint(l.output, outputStdout) - // Se c'è un file di log, scrivi anche lì (senza colori) + // If a log file is open, write there too (without colors). if l.logFile != nil { fmt.Fprint(l.logFile, outputFile) } @@ -208,7 +208,7 @@ func (l *Logger) HasErrors() bool { return l.errorCount > 0 } -// Debug scrive un log di debug +// Debug writes a debug log. func (l *Logger) Debug(format string, args ...interface{}) { l.log(types.LogLevelDebug, format, args...) } @@ -254,17 +254,17 @@ func (l *Logger) Skip(format string, args ...interface{}) { l.logWithLabel(types.LogLevelInfo, "SKIP", colorOverride, format, args...) } -// Warning scrive un log di warning +// Warning writes a warning log. func (l *Logger) Warning(format string, args ...interface{}) { l.log(types.LogLevelWarning, format, args...) } -// Error scrive un log di errore +// Error writes an error log. func (l *Logger) Error(format string, args ...interface{}) { l.log(types.LogLevelError, format, args...) } -// Critical scrive un log critico +// Critical writes a critical log. func (l *Logger) Critical(format string, args ...interface{}) { l.log(types.LogLevelCritical, format, args...) } @@ -305,19 +305,19 @@ func init() { defaultLogger = New(types.LogLevelInfo, true) } -// SetDefaultLogger imposta il logger di default +// SetDefaultLogger sets the default logger. func SetDefaultLogger(logger *Logger) { defaultLogger = logger } -// GetDefaultLogger restituisce il logger di default +// GetDefaultLogger returns the default logger. func GetDefaultLogger() *Logger { return defaultLogger } // Package-level convenience functions -// Debug scrive un log di debug usando il logger di default +// Debug writes a debug log using the default logger. func Debug(format string, args ...interface{}) { defaultLogger.Debug(format, args...) } @@ -327,27 +327,27 @@ func Info(format string, args ...interface{}) { defaultLogger.Info(format, args...) } -// Step scrive un log STEP usando il logger di default +// Step writes a STEP log using the default logger. func Step(format string, args ...interface{}) { defaultLogger.Step(format, args...) } -// Skip scrive un log SKIP usando il logger di default +// Skip writes a SKIP log using the default logger. func Skip(format string, args ...interface{}) { defaultLogger.Skip(format, args...) } -// Warning scrive un log di warning usando il logger di default +// Warning writes a warning log using the default logger. func Warning(format string, args ...interface{}) { defaultLogger.Warning(format, args...) } -// Error scrive un log di errore usando il logger di default +// Error writes an error log using the default logger. func Error(format string, args ...interface{}) { defaultLogger.Error(format, args...) } -// Critical scrive un log critico usando il logger di default +// Critical writes a critical log using the default logger. func Critical(format string, args ...interface{}) { defaultLogger.Critical(format, args...) } diff --git a/internal/notify/telegram_registration.go b/internal/notify/telegram_registration.go index 9f19a4e..69a4fd3 100644 --- a/internal/notify/telegram_registration.go +++ b/internal/notify/telegram_registration.go @@ -29,14 +29,14 @@ func truncateTelegramRegistrationBody(body []byte, max int) string { return text } -// TelegramRegistrationStatus rappresenta l'esito dell'handshake con il server Telegram centralizzato. +// TelegramRegistrationStatus represents the result of the handshake with the centralized Telegram server. type TelegramRegistrationStatus struct { Code int Message string Error error } -// CheckTelegramRegistration verifica lo stato della registrazione sul server centralizzato. +// CheckTelegramRegistration checks the registration status on the centralized server. func CheckTelegramRegistration(ctx context.Context, serverAPIHost, serverID string, logger *logging.Logger) TelegramRegistrationStatus { logTelegramRegistrationDebug(logger, "Telegram registration: start (serverAPIHost=%q serverID=%q len=%d)", serverAPIHost, serverID, len(serverID)) @@ -44,7 +44,7 @@ func CheckTelegramRegistration(ctx context.Context, serverAPIHost, serverID stri logTelegramRegistrationDebug(logger, "Telegram registration: missing serverID (empty string)") return TelegramRegistrationStatus{ Code: 0, - Message: "SERVER_ID non disponibile", + Message: "SERVER_ID not available", Error: fmt.Errorf("server ID missing"), } } diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 9a4c9c2..8798edb 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=1706596 +pid=1850710 host=pve -time=2026-01-20T23:21:05+01:00 +time=2026-01-21T03:56:03+01:00 diff --git a/internal/orchestrator/directory_recreation_test.go b/internal/orchestrator/directory_recreation_test.go index 198b15a..7b8a827 100644 --- a/internal/orchestrator/directory_recreation_test.go +++ b/internal/orchestrator/directory_recreation_test.go @@ -443,14 +443,14 @@ func TestIsLikelyZFSMountPointNoMatch(t *testing.T) { } } -// Test: isLikelyZFSMountPoint con path contenente "datastore" +// Test: isLikelyZFSMountPoint with a path containing "datastore" func TestIsLikelyZFSMountPointDatastorePath(t *testing.T) { logger := newDirTestLogger() cachePath, restore := overridePath(t, &zpoolCachePath, "zpool.cache") defer restore() writeFile(t, cachePath, "cache") - // Path con "datastore" nel nome dovrebbe matchare + // A path with "datastore" in the name should match if !isLikelyZFSMountPoint("/var/lib/datastore", logger) { t.Fatalf("expected true for path containing 'datastore'") } @@ -459,10 +459,10 @@ func TestIsLikelyZFSMountPointDatastorePath(t *testing.T) { } } -// Test: createPVEStorageStructure ritorna errore se base directory non creabile +// Test: createPVEStorageStructure returns an error if the base directory can't be created func TestCreatePVEStorageStructureBaseError(t *testing.T) { logger := newDirTestLogger() - // Path con carattere nullo non è valido + // A path containing a NUL byte is invalid invalidPath := "/dev/null/cannot/create/here" err := createPVEStorageStructure(invalidPath, "dir", logger) if err == nil { @@ -470,10 +470,10 @@ func TestCreatePVEStorageStructureBaseError(t *testing.T) { } } -// Test: createPBSDatastoreStructure ritorna errore se base directory non creabile +// Test: createPBSDatastoreStructure returns an error if the base directory can't be created func TestCreatePBSDatastoreStructureBaseError(t *testing.T) { logger := newDirTestLogger() - // Override zpoolCachePath per evitare ZFS detection + // Override zpoolCachePath to avoid ZFS detection _, cacheRestore := overridePath(t, &zpoolCachePath, "zpool.cache") defer cacheRestore() @@ -484,10 +484,10 @@ func TestCreatePBSDatastoreStructureBaseError(t *testing.T) { } } -// Test: RecreateDirectoriesFromConfig propaga errore da RecreateStorageDirectories +// Test: RecreateDirectoriesFromConfig propagates an error from RecreateStorageDirectories func TestRecreateDirectoriesFromConfigPVEStatError(t *testing.T) { logger := newDirTestLogger() - // Creiamo una directory e la rendiamo non leggibile per causare errore stat + // Create a directory and make it unreadable to trigger a stat error tmpDir := t.TempDir() cfgDir := filepath.Join(tmpDir, "noperm") if err := os.MkdirAll(cfgDir, 0o000); err != nil { @@ -501,7 +501,7 @@ func TestRecreateDirectoriesFromConfigPVEStatError(t *testing.T) { defer func() { storageCfgPath = prev }() err := RecreateDirectoriesFromConfig(SystemTypePVE, logger) - // Se siamo root, il test non funziona come previsto + // If we're root, the test won't behave as expected if os.Getuid() == 0 { t.Skip("test requires non-root user") } @@ -510,10 +510,10 @@ func TestRecreateDirectoriesFromConfigPVEStatError(t *testing.T) { } } -// Test: RecreateDirectoriesFromConfig propaga errore da RecreateDatastoreDirectories +// Test: RecreateDirectoriesFromConfig propagates an error from RecreateDatastoreDirectories func TestRecreateDirectoriesFromConfigPBSStatError(t *testing.T) { logger := newDirTestLogger() - // Creiamo una directory e la rendiamo non leggibile + // Create a directory and make it unreadable tmpDir := t.TempDir() cfgDir := filepath.Join(tmpDir, "noperm") if err := os.MkdirAll(cfgDir, 0o000); err != nil { @@ -527,7 +527,7 @@ func TestRecreateDirectoriesFromConfigPBSStatError(t *testing.T) { defer func() { datastoreCfgPath = prev }() err := RecreateDirectoriesFromConfig(SystemTypePBS, logger) - // Se siamo root, il test non funziona come previsto + // If we're root, the test won't behave as expected if os.Getuid() == 0 { t.Skip("test requires non-root user") } diff --git a/internal/orchestrator/extensions.go b/internal/orchestrator/extensions.go index 41a12a3..0b2253c 100644 --- a/internal/orchestrator/extensions.go +++ b/internal/orchestrator/extensions.go @@ -175,9 +175,9 @@ func (o *Orchestrator) DispatchEarlyErrorNotification(ctx context.Context, early return stats } -// dispatchNotificationsAndLogs esegue la fase di notifiche e gestione file di log. -// Viene usata sia nel percorso di successo che in quello di errore, così che -// le notifiche vengano comunque inviate e il log venga sempre chiuso/ruotato. +// dispatchNotificationsAndLogs runs the notification phase and log-file management. +// It is used both on success and on error paths, so notifications still go out +// and the log is always closed/rotated. func (o *Orchestrator) dispatchNotificationsAndLogs(ctx context.Context, stats *BackupStats) { if o == nil || o.logger == nil { return diff --git a/internal/orchestrator/restore_filesystem.go b/internal/orchestrator/restore_filesystem.go index 8d7f04c..7de0557 100644 --- a/internal/orchestrator/restore_filesystem.go +++ b/internal/orchestrator/restore_filesystem.go @@ -67,7 +67,7 @@ func SmartMergeFstab(ctx context.Context, logger *logging.Logger, reader *bufio. } defaultYes := analysis.RootComparable && analysis.RootMatch && (!analysis.SwapComparable || analysis.SwapMatch) - confirmMsg := "Vuoi aggiungere i mount mancanti (NFS/CIFS e dati su UUID/LABEL verificati)?" + confirmMsg := "Do you want to add the missing mounts (NFS/CIFS and data mounts with verified UUID/LABEL)?" confirmed, err := confirmLocal(ctx, reader, confirmMsg, defaultYes) if err != nil { return err @@ -304,13 +304,13 @@ func isSafeMountCandidate(e FstabEntry) bool { func printFstabAnalysis(logger *logging.Logger, res FstabAnalysisResult) { fmt.Println() - logger.Info("Analisi fstab:") + logger.Info("fstab analysis:") // Root Status if !res.RootComparable { - logger.Warning("! Root filesystem: non determinabile (entry mancante in current/backup fstab)") + logger.Warning("! Root filesystem: undetermined (missing entry in current/backup fstab)") } else if res.RootMatch { - logger.Info("✓ Root filesystem: compatibile (UUID kept from system)") + logger.Info("✓ Root filesystem: compatible (UUID kept from system)") } else { // ANSI Yellow/Red might be nice, but stick to standard logger for now. logger.Warning("! Root UUID mismatch: Backup is from a different machine (System info preserved)") @@ -319,9 +319,9 @@ func printFstabAnalysis(logger *logging.Logger, res FstabAnalysisResult) { // Swap Status if !res.SwapComparable { - logger.Info("Swap: non determinabile (entry mancante in current/backup fstab)") + logger.Info("Swap: undetermined (missing entry in current/backup fstab)") } else if res.SwapMatch { - logger.Info("✓ Swap: compatibile") + logger.Info("✓ Swap: compatible") } else { logger.Warning("! Swap mismatch: keeping current swap configuration") logger.Debug(" Details: Current=%s, Backup=%s", res.SwapDeviceCurrent, res.SwapDeviceBackup) @@ -329,20 +329,20 @@ func printFstabAnalysis(logger *logging.Logger, res FstabAnalysisResult) { // New Entries if len(res.ProposedMounts) > 0 { - logger.Info("+ %d mount(s) sicuri trovati nel backup ma non nel sistema attuale:", len(res.ProposedMounts)) + logger.Info("+ %d safe mount(s) found in the backup but missing from the current system:", len(res.ProposedMounts)) for _, m := range res.ProposedMounts { logger.Info(" %s -> %s (%s)", m.Device, m.MountPoint, m.Type) } } else { - logger.Info("✓ Nessun mount aggiuntivo trovato nel backup.") + logger.Info("✓ No additional mounts found in the backup.") } if len(res.SkippedMounts) > 0 { - logger.Warning("! %d mount(s) trovati ma NON proposti automaticamente (potenzialmente rischiosi):", len(res.SkippedMounts)) + logger.Warning("! %d mount(s) found but NOT auto-proposed (potentially risky):", len(res.SkippedMounts)) for _, m := range res.SkippedMounts { logger.Warning(" %s -> %s (%s)", m.Device, m.MountPoint, m.Type) } - logger.Info(" Suggerimento: verifica dischi/UUID e opzioni (nofail/_netdev) prima di aggiungerli a /etc/fstab.") + logger.Info(" Hint: verify disks/UUIDs and options (nofail/_netdev) before adding them to /etc/fstab.") } fmt.Println() } diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index 46a877a..cad3e57 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -1896,23 +1896,23 @@ func runFullRestoreTUI(ctx context.Context, candidate *decryptCandidate, prepare logger.Info("No new safe mounts found to restore. Keeping current fstab.") } else { var msg strings.Builder - msg.WriteString("ProxSave ha trovato mount mancanti in /etc/fstab.\n\n") + msg.WriteString("ProxSave found missing mounts in /etc/fstab.\n\n") if analysis.RootComparable && !analysis.RootMatch { - msg.WriteString("⚠ Root UUID mismatch: il backup sembra provenire da una macchina diversa.\n") + msg.WriteString("⚠ Root UUID mismatch: the backup appears to come from a different machine.\n") } if analysis.SwapComparable && !analysis.SwapMatch { - msg.WriteString("⚠ Swap mismatch: verrà mantenuta la configurazione swap attuale.\n") + msg.WriteString("⚠ Swap mismatch: the current swap configuration will be kept.\n") } - msg.WriteString("\nMount proposti (sicuri):\n") + msg.WriteString("\nProposed mounts (safe):\n") for _, m := range analysis.ProposedMounts { fmt.Fprintf(&msg, " - %s -> %s (%s)\n", m.Device, m.MountPoint, m.Type) } if len(analysis.SkippedMounts) > 0 { - msg.WriteString("\nMount trovati ma non proposti automaticamente:\n") + msg.WriteString("\nMounts found but not auto-proposed:\n") for _, m := range analysis.SkippedMounts { fmt.Fprintf(&msg, " - %s -> %s (%s)\n", m.Device, m.MountPoint, m.Type) } - msg.WriteString("\nSuggerimento: verifica dischi/UUID e opzioni (nofail/_netdev) prima di aggiungerli.\n") + msg.WriteString("\nHint: verify disks/UUIDs and options (nofail/_netdev) before adding them.\n") } apply, perr := promptYesNoTUIFunc("Smart fstab merge", configPath, buildSig, msg.String(), "Apply", "Skip") diff --git a/internal/orchestrator/storage_adapter.go b/internal/orchestrator/storage_adapter.go index ab1dedf..6e473bf 100644 --- a/internal/orchestrator/storage_adapter.go +++ b/internal/orchestrator/storage_adapter.go @@ -184,8 +184,8 @@ func (s *StorageAdapter) logCurrentBackupCount() { return } - // Il backend cloud logga già il conteggio corrente durante ApplyRetention - // riutilizzando la stessa lista; evitiamo una seconda chiamata rclone lsl. + // The cloud backend already logs the current count during ApplyRetention + // by reusing the same list; avoid a second rclone lsl call. if s.backend.Location() == storage.LocationCloud { return } diff --git a/internal/storage/retention_normalize.go b/internal/storage/retention_normalize.go index 5583123..fa86cf3 100644 --- a/internal/storage/retention_normalize.go +++ b/internal/storage/retention_normalize.go @@ -4,10 +4,10 @@ import ( "github.com/tis24dev/proxsave/internal/logging" ) -// NormalizeGFSRetentionConfig applica le correzioni necessarie alla configurazione -// GFS prima di eseguire la retention. Attualmente: -// - garantisce che il tier DAILY abbia almeno valore 1 (minimo accettato) -// se la policy è gfs e RETENTION_DAILY è <= 0. +// NormalizeGFSRetentionConfig applies the required adjustments to the GFS configuration +// before running retention. Currently: +// - ensures the DAILY tier is at least 1 (minimum accepted value) +// when the policy is gfs and RETENTION_DAILY is <= 0. // - emits a log line to document the adjustment. func NormalizeGFSRetentionConfig(logger *logging.Logger, backendName string, cfg RetentionConfig) RetentionConfig { if cfg.Policy != "gfs" { diff --git a/internal/types/common.go b/internal/types/common.go index e8e4b6c..97159b8 100644 --- a/internal/types/common.go +++ b/internal/types/common.go @@ -2,7 +2,7 @@ package types import "time" -// ProxmoxType rappresenta il tipo di ambiente Proxmox +// ProxmoxType represents the type of Proxmox environment. type ProxmoxType string const ( @@ -12,67 +12,67 @@ const ( // ProxmoxBS - Proxmox Backup Server ProxmoxBS ProxmoxType = "pbs" - // ProxmoxUnknown - Tipo sconosciuto o non rilevato + // ProxmoxUnknown - Unknown or undetected type ProxmoxUnknown ProxmoxType = "unknown" ) -// String restituisce la rappresentazione stringa del tipo Proxmox +// String returns the string representation of the Proxmox type. func (p ProxmoxType) String() string { return string(p) } -// CompressionType rappresenta il tipo di compressione +// CompressionType represents the compression type. type CompressionType string const ( - // CompressionGzip - Compressione gzip + // CompressionGzip - gzip compression CompressionGzip CompressionType = "gz" - // CompressionPigz - Compressione gzip parallela (pigz) + // CompressionPigz - parallel gzip compression (pigz) CompressionPigz CompressionType = "pigz" - // CompressionBzip2 - Compressione bzip2 + // CompressionBzip2 - bzip2 compression CompressionBzip2 CompressionType = "bz2" - // CompressionXZ - Compressione xz (LZMA) + // CompressionXZ - xz compression (LZMA) CompressionXZ CompressionType = "xz" - // CompressionLZMA - Compressione lzma classica + // CompressionLZMA - classic lzma compression CompressionLZMA CompressionType = "lzma" - // CompressionZstd - Compressione zstd + // CompressionZstd - zstd compression CompressionZstd CompressionType = "zst" - // CompressionNone - Nessuna compressione + // CompressionNone - no compression CompressionNone CompressionType = "none" ) -// String restituisce la rappresentazione stringa del tipo di compressione +// String returns the string representation of the compression type. func (c CompressionType) String() string { return string(c) } -// BackupInfo contiene informazioni su un backup +// BackupInfo contains information about a backup. type BackupInfo struct { - // Timestamp del backup + // Backup timestamp Timestamp time.Time - // Nome del file di backup + // Backup file name Filename string - // Dimensione del file in bytes + // File size in bytes Size int64 - // Checksum SHA256 + // SHA256 checksum Checksum string - // Tipo di compressione usata + // Compression type used Compression CompressionType - // Path completo del file + // Full file path Path string - // Tipo di ambiente Proxmox + // Proxmox environment type ProxmoxType ProxmoxType } @@ -101,49 +101,49 @@ type BackupMetadata struct { Version string } -// StorageLocation rappresenta una destinazione di storage +// StorageLocation represents a storage destination. type StorageLocation string const ( - // StorageLocal - Storage locale + // StorageLocal - Local storage StorageLocal StorageLocation = "local" - // StorageSecondary - Storage secondario + // StorageSecondary - Secondary storage StorageSecondary StorageLocation = "secondary" - // StorageCloud - Storage cloud (rclone) + // StorageCloud - Cloud storage (rclone) StorageCloud StorageLocation = "cloud" ) -// String restituisce la rappresentazione stringa della location +// String returns the string representation of the location. func (s StorageLocation) String() string { return string(s) } -// LogLevel rappresenta il livello di logging +// LogLevel represents the logging level. type LogLevel int const ( - // LogLevelDebug - Log di debug (massimo dettaglio) + // LogLevelDebug - Debug logs (maximum detail) LogLevelDebug LogLevel = 5 - // LogLevelInfo - Informazioni generali + // LogLevelInfo - General information LogLevelInfo LogLevel = 4 - // LogLevelWarning - Avvisi + // LogLevelWarning - Warnings LogLevelWarning LogLevel = 3 - // LogLevelError - Errori + // LogLevelError - Errors LogLevelError LogLevel = 2 - // LogLevelCritical - Errori critici + // LogLevelCritical - Critical errors LogLevelCritical LogLevel = 1 - // LogLevelNone - Nessun log + // LogLevelNone - No logs LogLevelNone LogLevel = 0 ) -// String restituisce la rappresentazione stringa del log level +// String returns the string representation of the log level. func (l LogLevel) String() string { switch l { case LogLevelDebug: diff --git a/pkg/utils/files.go b/pkg/utils/files.go index 27c8b50..40f1ed7 100644 --- a/pkg/utils/files.go +++ b/pkg/utils/files.go @@ -8,7 +8,7 @@ import ( "path/filepath" ) -// FileExists verifica se un file esiste +// FileExists checks whether a file exists. func FileExists(path string) bool { info, err := os.Stat(path) if err != nil { @@ -17,7 +17,7 @@ func FileExists(path string) bool { return !info.IsDir() } -// DirExists verifica se una directory esiste +// DirExists checks whether a directory exists. func DirExists(path string) bool { info, err := os.Stat(path) if err != nil { @@ -26,7 +26,7 @@ func DirExists(path string) bool { return info.IsDir() } -// EnsureDir crea una directory se non esiste +// EnsureDir creates a directory if it doesn't exist. func EnsureDir(path string) error { if DirExists(path) { return nil @@ -34,7 +34,7 @@ func EnsureDir(path string) error { return os.MkdirAll(path, 0755) } -// ComputeSHA256 calcola il checksum SHA256 di un file +// ComputeSHA256 computes the SHA256 checksum of a file. func ComputeSHA256(filePath string) (string, error) { file, err := os.Open(filePath) if err != nil { @@ -50,7 +50,7 @@ func ComputeSHA256(filePath string) (string, error) { return fmt.Sprintf("%x", hash.Sum(nil)), nil } -// GetFileSize restituisce la dimensione di un file in bytes +// GetFileSize returns the file size in bytes. func GetFileSize(filePath string) (int64, error) { info, err := os.Stat(filePath) if err != nil { @@ -59,7 +59,7 @@ func GetFileSize(filePath string) (int64, error) { return info.Size(), nil } -// AbsPath restituisce il path assoluto di un file +// AbsPath returns the absolute path of a file. func AbsPath(path string) (string, error) { absPath, err := filepath.Abs(path) if err != nil { @@ -68,7 +68,7 @@ func AbsPath(path string) (string, error) { return absPath, nil } -// IsAbsPath verifica se un path è assoluto +// IsAbsPath checks whether a path is absolute. func IsAbsPath(path string) bool { return filepath.IsAbs(path) } diff --git a/pkg/utils/strings.go b/pkg/utils/strings.go index 09509f9..93753c7 100644 --- a/pkg/utils/strings.go +++ b/pkg/utils/strings.go @@ -7,7 +7,7 @@ import ( "strings" ) -// FormatBytes converte bytes in formato human-readable (KB, MB, GB, ecc.) +// FormatBytes converts bytes to a human-readable format (KB, MB, GB, etc.). func FormatBytes(bytes int64) string { const unit = 1024 if bytes < unit { @@ -21,13 +21,13 @@ func FormatBytes(bytes int64) string { return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp]) } -// ParseBool converte una stringa in booleano (supporta vari formati) +// ParseBool converts a string to a boolean (supports multiple formats). func ParseBool(s string) bool { s = strings.ToLower(strings.TrimSpace(s)) return s == "true" || s == "1" || s == "yes" || s == "on" || s == "enabled" } -// TrimQuotes rimuove le virgolette da una stringa +// TrimQuotes removes surrounding quotes from a string. func TrimQuotes(s string) string { s = strings.TrimSpace(s) if len(s) >= 2 { @@ -38,8 +38,8 @@ func TrimQuotes(s string) string { return s } -// SplitKeyValue divide una stringa "key=value" in key e value -// Supporta anche commenti inline: KEY="value" # comment +// SplitKeyValue splits a "key=value" string into key and value. +// Supports inline comments too: KEY="value" # comment func SplitKeyValue(line string) (string, string, bool) { parts := strings.SplitN(line, "=", 2) if len(parts) != 2 { @@ -70,7 +70,7 @@ func SplitKeyValue(line string) (string, string, bool) { return key, value, true } -// IsComment verifica se una linea è un commento (inizia con #) +// IsComment checks whether a line is a comment (starts with #). func IsComment(line string) bool { trimmed := strings.TrimSpace(line) return strings.HasPrefix(trimmed, "#") || trimmed == "" From 4895ac6142fb54bc1f5ec996d9d858cf39a867f0 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Wed, 21 Jan 2026 21:35:43 +0100 Subject: [PATCH 02/10] Improve network rollback handling and reporting Refactors network rollback logic to keep rollback armed if not committed, and updates error reporting to indicate whether rollback is still armed or has executed. Also enhances logging in the restore workflow to clarify the rollback state and current IP address. This provides clearer diagnostics and safer handling of uncommitted network changes. --- internal/orchestrator/.backup.lock | 4 +-- internal/orchestrator/network_apply.go | 39 ++++++++++++-------------- internal/orchestrator/restore.go | 12 ++++++-- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 8798edb..80a7ad6 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=1850710 +pid=1933994 host=pve -time=2026-01-21T03:56:03+01:00 +time=2026-01-21T06:33:18+01:00 diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index e9c073a..9391b4e 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -22,6 +22,7 @@ var ErrNetworkApplyNotCommitted = errors.New("network configuration not committe type NetworkApplyNotCommittedError struct { RollbackLog string RestoredIP string + RollbackArmed bool } func (e *NetworkApplyNotCommittedError) Error() string { @@ -427,32 +428,25 @@ func applyNetworkWithRollbackCLI(ctx context.Context, reader *bufio.Reader, logg return nil } - // Timer window expired: run rollback now so the restore summary can report the final state. - if output, rbErr := restoreCmd.Run(ctx, "sh", handle.scriptPath); rbErr != nil { - if len(output) > 0 { - logger.Debug("Rollback script output: %s", strings.TrimSpace(string(output))) - } - return fmt.Errorf("network apply not committed; rollback failed (log: %s): %w", strings.TrimSpace(handle.logPath), rbErr) - } else if len(output) > 0 { - logger.Debug("Rollback script output: %s", strings.TrimSpace(string(output))) - } - disarmNetworkRollback(ctx, logger, handle) - + // Not committed: keep rollback ARMED. Do not disarm. + // The rollback script will run via systemd-run/nohup when the timer expires. restoredIP := "unknown" if strings.TrimSpace(iface) != "" { - deadline := time.Now().Add(5 * time.Second) - for time.Now().Before(deadline) { - ep, err := currentNetworkEndpoint(ctx, iface, 1*time.Second) - if err == nil && len(ep.Addresses) > 0 { - restoredIP = strings.Join(ep.Addresses, ", ") - break - } - time.Sleep(300 * time.Millisecond) + if ep, err := currentNetworkEndpoint(ctx, iface, 1*time.Second); err == nil && len(ep.Addresses) > 0 { + restoredIP = strings.Join(ep.Addresses, ", ") + } + } + rollbackArmed := true + if handle.markerPath != "" { + if _, statErr := restoreFS.Stat(handle.markerPath); statErr != nil { + // Marker missing => rollback likely already executed (or was manually removed). + rollbackArmed = false } } return &NetworkApplyNotCommittedError{ RollbackLog: strings.TrimSpace(handle.logPath), RestoredIP: strings.TrimSpace(restoredIP), + RollbackArmed: rollbackArmed, } } @@ -546,8 +540,11 @@ func disarmNetworkRollback(ctx context.Context, logger *logging.Logger, handle * } } if handle.unitName != "" && commandAvailable("systemctl") { - if output, err := restoreCmd.Run(ctx, "systemctl", "stop", handle.unitName); err != nil { - logger.Debug("Failed to stop rollback unit %s: %v (output: %s)", handle.unitName, err, strings.TrimSpace(string(output))) + // systemd-run --on-active creates both .timer and .service; stop both. + for _, unit := range []string{handle.unitName + ".timer", handle.unitName + ".service"} { + if output, err := restoreCmd.Run(ctx, "systemctl", "stop", unit); err != nil { + logger.Debug("Failed to stop rollback unit %s: %v (output: %s)", unit, err, strings.TrimSpace(string(output))) + } } } } diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 4a0e426..03500c0 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -455,15 +455,21 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging restoreHadWarnings = true if errors.Is(err, ErrNetworkApplyNotCommitted) { var notCommitted *NetworkApplyNotCommittedError - restoredIP := "unknown" + observedIP := "unknown" rollbackLog := "" + rollbackArmed := false if errors.As(err, ¬Committed) && notCommitted != nil { if strings.TrimSpace(notCommitted.RestoredIP) != "" { - restoredIP = strings.TrimSpace(notCommitted.RestoredIP) + observedIP = strings.TrimSpace(notCommitted.RestoredIP) } rollbackLog = strings.TrimSpace(notCommitted.RollbackLog) + rollbackArmed = notCommitted.RollbackArmed + } + if rollbackArmed { + logger.Warning("Network apply not committed; rollback is ARMED and will run automatically. Current IP: %s", observedIP) + } else { + logger.Warning("Network apply not committed; rollback has executed (or marker cleared). Current IP: %s", observedIP) } - logger.Warning("Network apply not committed and original settings restored. IP: %s", restoredIP) if rollbackLog != "" { logger.Info("Rollback log: %s", rollbackLog) } From 51384414706ed0e51afb8924ccaf126c4c2eb77b Mon Sep 17 00:00:00 2001 From: tis24dev Date: Wed, 21 Jan 2026 22:17:04 +0100 Subject: [PATCH 03/10] Improve network rollback handling and logging Enhances network rollback logic to better detect if rollback is already running and prevents late commits. Refactors rollback disarm to only stop the timer, adds detailed logging and state checks to the rollback script, and improves error handling for network reload commands. --- internal/orchestrator/.backup.lock | 4 +- internal/orchestrator/network_apply.go | 104 ++++++++++++++++++++----- 2 files changed, 87 insertions(+), 21 deletions(-) diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 80a7ad6..f3217c3 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=1933994 +pid=1961097 host=pve -time=2026-01-21T06:33:18+01:00 +time=2026-01-21T07:21:16+01:00 diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index 9391b4e..5b197dd 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -420,9 +420,15 @@ func applyNetworkWithRollbackCLI(ctx context.Context, reader *bufio.Reader, logg committed, err := promptNetworkCommitWithCountdown(ctx, reader, logger, remaining) if err != nil { logger.Warning("Commit prompt error: %v", err) + logger.Info("Commit input lost; rollback remains ARMED and will proceed automatically.") + return buildNetworkApplyNotCommittedError(ctx, iface, handle) } logging.DebugStep(logger, "network safe apply (cli)", "User commit result: committed=%v", committed) if committed { + if rollbackAlreadyRunning(ctx, handle) { + logger.Warning("Commit received too late: rollback already running. Network configuration NOT committed.") + return buildNetworkApplyNotCommittedError(ctx, iface, handle) + } disarmNetworkRollback(ctx, logger, handle) logger.Info("Network configuration committed successfully.") return nil @@ -430,26 +436,52 @@ func applyNetworkWithRollbackCLI(ctx context.Context, reader *bufio.Reader, logg // Not committed: keep rollback ARMED. Do not disarm. // The rollback script will run via systemd-run/nohup when the timer expires. + return buildNetworkApplyNotCommittedError(ctx, iface, handle) +} + +func buildNetworkApplyNotCommittedError(ctx context.Context, iface string, handle *networkRollbackHandle) *NetworkApplyNotCommittedError { restoredIP := "unknown" if strings.TrimSpace(iface) != "" { if ep, err := currentNetworkEndpoint(ctx, iface, 1*time.Second); err == nil && len(ep.Addresses) > 0 { restoredIP = strings.Join(ep.Addresses, ", ") } } + rollbackArmed := true - if handle.markerPath != "" { + if handle == nil { + rollbackArmed = false + } else if strings.TrimSpace(handle.markerPath) != "" { if _, statErr := restoreFS.Stat(handle.markerPath); statErr != nil { // Marker missing => rollback likely already executed (or was manually removed). rollbackArmed = false } } + + rollbackLog := "" + if handle != nil { + rollbackLog = strings.TrimSpace(handle.logPath) + } return &NetworkApplyNotCommittedError{ - RollbackLog: strings.TrimSpace(handle.logPath), - RestoredIP: strings.TrimSpace(restoredIP), + RollbackLog: rollbackLog, + RestoredIP: strings.TrimSpace(restoredIP), RollbackArmed: rollbackArmed, } } +func rollbackAlreadyRunning(ctx context.Context, handle *networkRollbackHandle) bool { + if handle == nil || strings.TrimSpace(handle.unitName) == "" || !commandAvailable("systemctl") { + return false + } + checkCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + out, err := restoreCmd.Run(checkCtx, "systemctl", "is-active", strings.TrimSpace(handle.unitName)+".service") + if err != nil { + return false + } + state := strings.TrimSpace(string(out)) + return state == "active" || state == "activating" +} + func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath string, timeout time.Duration, workDir string) (handle *networkRollbackHandle, err error) { done := logging.DebugStart(logger, "arm network rollback", "backup=%s timeout=%s workDir=%s", strings.TrimSpace(backupPath), timeout, strings.TrimSpace(workDir)) defer func() { done(err) }() @@ -534,18 +566,19 @@ func disarmNetworkRollback(ctx context.Context, logger *logging.Logger, handle * return } logging.DebugStep(logger, "disarm network rollback", "Disarming rollback (marker=%s unit=%s)", strings.TrimSpace(handle.markerPath), strings.TrimSpace(handle.unitName)) - if handle.markerPath != "" { + // Remove marker first so that even if the timer triggers concurrently the rollback script exits early. + if strings.TrimSpace(handle.markerPath) != "" { if err := restoreFS.Remove(handle.markerPath); err != nil && !errors.Is(err, os.ErrNotExist) { logger.Debug("Failed to remove rollback marker %s: %v", handle.markerPath, err) } } - if handle.unitName != "" && commandAvailable("systemctl") { - // systemd-run --on-active creates both .timer and .service; stop both. - for _, unit := range []string{handle.unitName + ".timer", handle.unitName + ".service"} { - if output, err := restoreCmd.Run(ctx, "systemctl", "stop", unit); err != nil { - logger.Debug("Failed to stop rollback unit %s: %v (output: %s)", unit, err, strings.TrimSpace(string(output))) - } + if strings.TrimSpace(handle.unitName) != "" && commandAvailable("systemctl") { + // Stop the timer only. If the service already started, let it finish. + timerUnit := strings.TrimSpace(handle.unitName) + ".timer" + if output, err := restoreCmd.Run(ctx, "systemctl", "stop", timerUnit); err != nil { + logger.Debug("Failed to stop rollback timer %s: %v (output: %s)", timerUnit, err, strings.TrimSpace(string(output))) } + _, _ = restoreCmd.Run(ctx, "systemctl", "reset-failed", strings.TrimSpace(handle.unitName)+".service", timerUnit) } } @@ -914,16 +947,49 @@ func buildRollbackScript(markerPath, backupPath, logPath string, restartNetworki } if restartNetworking { - lines = append(lines, - `echo "Restart networking after rollback" >> "$LOG"`, - `if command -v ifreload >/dev/null 2>&1; then ifreload -a >> "$LOG" 2>&1 || true;`, - `elif command -v systemctl >/dev/null 2>&1; then systemctl restart networking >> "$LOG" 2>&1 || true;`, - `elif command -v ifup >/dev/null 2>&1; then ifup -a >> "$LOG" 2>&1 || true;`, + lines = append(lines, + `echo "Restart networking after rollback" >> "$LOG"`, + `echo "Live state before reload:" >> "$LOG"`, + `ip -br addr >> "$LOG" 2>&1 || true`, + `ip route show >> "$LOG" 2>&1 || true`, + `RELOAD_OK=0`, + `if command -v ifreload >/dev/null 2>&1; then`, + ` echo "Executing: ifreload -a" >> "$LOG"`, + ` if ifreload -a >> "$LOG" 2>&1; then`, + ` RELOAD_OK=1`, + ` echo "ifreload: OK" >> "$LOG"`, + ` else`, + ` RC=$?`, + ` echo "ERROR: ifreload -a failed (exit=$RC)" >> "$LOG"`, + ` fi`, `fi`, - ) - } else { - lines = append(lines, `echo "Restart networking after rollback: skipped (manual)" >> "$LOG"`) - } + `if [ "$RELOAD_OK" -eq 0 ] && command -v systemctl >/dev/null 2>&1; then`, + ` echo "Executing fallback: systemctl restart networking" >> "$LOG"`, + ` if systemctl restart networking >> "$LOG" 2>&1; then`, + ` RELOAD_OK=1`, + ` echo "systemctl restart networking: OK" >> "$LOG"`, + ` else`, + ` RC=$?`, + ` echo "ERROR: systemctl restart networking failed (exit=$RC)" >> "$LOG"`, + ` fi`, + `fi`, + `if [ "$RELOAD_OK" -eq 0 ] && command -v ifup >/dev/null 2>&1; then`, + ` echo "Executing fallback: ifup -a" >> "$LOG"`, + ` if ifup -a >> "$LOG" 2>&1; then`, + ` RELOAD_OK=1`, + ` echo "ifup -a: OK" >> "$LOG"`, + ` else`, + ` RC=$?`, + ` echo "ERROR: ifup -a failed (exit=$RC)" >> "$LOG"`, + ` fi`, + `fi`, + `echo "Live state after reload:" >> "$LOG"`, + `ip -br addr >> "$LOG" 2>&1 || true`, + `ip route show >> "$LOG" 2>&1 || true`, + ) + } else { + lines = append(lines, `echo "Restart networking after rollback: skipped (manual)" >> "$LOG"`) + } lines = append(lines, `rm -f "$MARKER"`, From f3979793a6e78b1927b5780aca5f49a586146ef9 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 01:05:15 +0100 Subject: [PATCH 04/10] Add prompt countdowns and improve network restore safety Introduces visible 90-second countdowns for critical prompts (network apply and smart /etc/fstab merge) to prevent hangs in automated/remote scenarios, defaulting to safe actions if unanswered. Refactors network rollback logic for better diagnostics and logging, adds new helper functions for input abort detection, and improves error handling throughout the restore workflow to support user-initiated aborts. Updates documentation to reflect new prompt timeout behavior and smart merge safety features. --- docs/RESTORE_GUIDE.md | 34 ++- internal/input/input.go | 9 + internal/input/input_test.go | 16 +- internal/orchestrator/.backup.lock | 4 +- internal/orchestrator/network_apply.go | 239 ++++++++++++----- internal/orchestrator/network_diagnostics.go | 243 ++++++++++++++++++ internal/orchestrator/prompts_cli.go | 88 +++++++ internal/orchestrator/restore.go | 42 ++- internal/orchestrator/restore_filesystem.go | 24 +- internal/orchestrator/restore_tui.go | 134 +++++++++- .../restore_workflow_abort_test.go | 143 +++++++++++ .../restore_workflow_warnings_test.go | 174 +++++++++++++ 12 files changed, 1047 insertions(+), 103 deletions(-) create mode 100644 internal/orchestrator/restore_workflow_abort_test.go create mode 100644 internal/orchestrator/restore_workflow_warnings_test.go diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 92f057c..51bef58 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -1642,6 +1642,14 @@ Type "RESTORE" (exact case) to proceed, or "cancel"/"0" to abort: _ - Case-sensitive - Prevents accidental restoration +**Prompt timeouts (auto-skip)**: +- Some interactive prompts include a visible countdown (currently **90 seconds**) to avoid getting “stuck” waiting for input in remote/automated scenarios. +- If the user does not answer before the countdown reaches 0, ProxSave proceeds with a **safe default** (no destructive action) and logs the decision. + +Current auto-skip prompts: +- **Smart `/etc/fstab` merge**: defaults to **Skip** (no changes). +- **Live network apply** (“Apply restored network configuration now…”): defaults to **No** (stays staged/on-disk only; no live reload). + ### 3. Compatibility Validation **System Type Detection**: @@ -1662,6 +1670,9 @@ Type "yes" to continue anyway or "no" to abort: _ If the **network** category is restored, ProxSave can optionally apply the new network configuration immediately using a **transactional rollback timer**. +**Apply prompt auto-skip**: +- The “apply now” prompt includes a **90-second** countdown; if you do not answer in time, ProxSave defaults to **No** and skips the live reload. + **Important (console recommended)**: - Run the live network apply/commit step from the **local console** (physical console, IPMI/iDRAC/iLO, Proxmox console, or hypervisor console), not from SSH. - If the restored network config changes the management IP or routes, your SSH session will drop and you may be unable to type `COMMIT`. @@ -1704,7 +1715,20 @@ This protects SSH/GUI access during network changes. **Result reporting**: - If you do not type `COMMIT`, ProxSave completes the restore with warnings and reports that the original network settings were restored (including the current IP, when detectable), plus the rollback log path. -### 4. Hard Guards +### 5. Smart `/etc/fstab` Merge (Optional) + +If the restore includes filesystem configuration (notably `/etc/fstab`), ProxSave can run a **smart merge** instead of blindly overwriting your current `fstab`. + +**What it does**: +- Compares the current `/etc/fstab` with the backup copy. +- Keeps existing critical entries (for example, root and swap) when they already match the running system. +- Detects **safe mount candidates** from the backup (for example, additional NFS mounts) and offers to add them. + +**Safety behavior**: +- The user is prompted before any change is written. +- The prompt includes a **90-second** countdown; if you do not answer in time, ProxSave defaults to **Skip** (no changes). + +### 6. Hard Guards **Path Traversal Prevention**: - All extracted paths validated @@ -1721,7 +1745,7 @@ if cleanDestRoot == "/" && strings.HasPrefix(target, "/etc/pve") { ``` - Absolute prevention of `/etc/pve` corruption -### 5. Service Management Fail-Fast +### 7. Service Management Fail-Fast **Service Stop**: If ANY service fails to stop → ABORT entire restore @@ -1730,7 +1754,7 @@ if cleanDestRoot == "/" && strings.HasPrefix(target, "/etc/pve") { - Better to fail safely than corrupt database - User can investigate and retry -### 6. Comprehensive Logging +### 8. Comprehensive Logging **Detailed Log**: `/tmp/proxsave/restore_YYYYMMDD_HHMMSS.log` @@ -1768,7 +1792,7 @@ grep "storage.cfg" /tmp/proxsave/restore_20251120_143052.log grep "FAILED" /tmp/proxsave/restore_20251120_143052.log ``` -### 7. Checksum Verification +### 9. Checksum Verification **SHA256 Verification**: - Backup checksum verified after decryption @@ -1783,7 +1807,7 @@ Actual: a1b2c3... ✓ Checksum verified successfully. ``` -### 8. Deferred Service Restart +### 10. Deferred Service Restart **Go defer pattern** ensures services restart even if restore fails: diff --git a/internal/input/input.go b/internal/input/input.go index 7dafd2d..500e43e 100644 --- a/internal/input/input.go +++ b/internal/input/input.go @@ -15,6 +15,15 @@ import ( // Callers should translate this into the appropriate workflow-level abort error. var ErrInputAborted = errors.New("input aborted") +// IsAborted reports whether an operation was aborted by the user (typically via Ctrl+C), +// by checking for ErrInputAborted and context cancellation. +func IsAborted(err error) bool { + if err == nil { + return false + } + return errors.Is(err, ErrInputAborted) || errors.Is(err, context.Canceled) +} + // MapInputError normalizes common stdin errors (EOF/closed fd) into ErrInputAborted. func MapInputError(err error) error { if err == nil { diff --git a/internal/input/input_test.go b/internal/input/input_test.go index 9369375..cad4217 100644 --- a/internal/input/input_test.go +++ b/internal/input/input_test.go @@ -39,6 +39,21 @@ func TestMapInputError(t *testing.T) { } } +func TestIsAborted(t *testing.T) { + if IsAborted(nil) { + t.Fatalf("expected false for nil") + } + if !IsAborted(ErrInputAborted) { + t.Fatalf("expected true for ErrInputAborted") + } + if !IsAborted(context.Canceled) { + t.Fatalf("expected true for context.Canceled") + } + if IsAborted(errors.New("other")) { + t.Fatalf("expected false for non-abort errors") + } +} + func TestReadLineWithContext_ReturnsLine(t *testing.T) { reader := bufio.NewReader(strings.NewReader("hello\n")) got, err := ReadLineWithContext(context.Background(), reader) @@ -145,4 +160,3 @@ func TestReadPasswordWithContext_CancelledReturnsAborted(t *testing.T) { t.Fatalf("err=%v; want %v", err, ErrInputAborted) } } - diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index f3217c3..09a6350 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=1961097 +pid=2067929 host=pve -time=2026-01-21T07:21:16+01:00 +time=2026-01-21T10:24:39+01:00 diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index 5b197dd..f0737db 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -20,8 +20,8 @@ const defaultNetworkRollbackTimeout = 180 * time.Second var ErrNetworkApplyNotCommitted = errors.New("network configuration not committed") type NetworkApplyNotCommittedError struct { - RollbackLog string - RestoredIP string + RollbackLog string + RestoredIP string RollbackArmed bool } @@ -116,11 +116,17 @@ func maybeApplyNetworkConfigCLI(ctx context.Context, reader *bufio.Reader, logge } logging.DebugStep(logger, "network safe apply (cli)", "Prompt: apply network now with rollback timer") - applyNowPrompt := fmt.Sprintf( - "Apply restored network configuration now with automatic rollback (%ds)? (y/N): ", - int(defaultNetworkRollbackTimeout.Seconds()), - ) - applyNow, err := promptYesNo(ctx, reader, applyNowPrompt) + rollbackSeconds := int(defaultNetworkRollbackTimeout.Seconds()) + fmt.Println() + fmt.Println("Network restore: a restored network configuration is ready to apply.") + if strings.TrimSpace(stageRoot) != "" { + fmt.Printf("Source: %s (will be copied to /etc and applied)\n", strings.TrimSpace(stageRoot)) + } + fmt.Println("This will reload networking immediately (no reboot).") + fmt.Println("WARNING: This may change the active IP and disconnect SSH/Web sessions.") + fmt.Printf("After applying, type COMMIT within %ds or ProxSave will roll back automatically.\n", rollbackSeconds) + fmt.Println("Recommendation: run this step from the local console/IPMI, not over SSH.") + applyNow, err := promptYesNoWithCountdown(ctx, reader, logger, "Apply network configuration now?", 90*time.Second, false) if err != nil { return err } @@ -419,15 +425,14 @@ func applyNetworkWithRollbackCLI(ctx context.Context, reader *bufio.Reader, logg logging.DebugStep(logger, "network safe apply (cli)", "Wait for COMMIT (rollback in %ds)", int(remaining.Seconds())) committed, err := promptNetworkCommitWithCountdown(ctx, reader, logger, remaining) if err != nil { - logger.Warning("Commit prompt error: %v", err) - logger.Info("Commit input lost; rollback remains ARMED and will proceed automatically.") - return buildNetworkApplyNotCommittedError(ctx, iface, handle) + logger.Warning("Commit input lost (%v); rollback remains ARMED and will proceed automatically.", err) + return buildNetworkApplyNotCommittedError(ctx, logger, iface, handle) } logging.DebugStep(logger, "network safe apply (cli)", "User commit result: committed=%v", committed) if committed { - if rollbackAlreadyRunning(ctx, handle) { + if rollbackAlreadyRunning(ctx, logger, handle) { logger.Warning("Commit received too late: rollback already running. Network configuration NOT committed.") - return buildNetworkApplyNotCommittedError(ctx, iface, handle) + return buildNetworkApplyNotCommittedError(ctx, logger, iface, handle) } disarmNetworkRollback(ctx, logger, handle) logger.Info("Network configuration committed successfully.") @@ -436,24 +441,35 @@ func applyNetworkWithRollbackCLI(ctx context.Context, reader *bufio.Reader, logg // Not committed: keep rollback ARMED. Do not disarm. // The rollback script will run via systemd-run/nohup when the timer expires. - return buildNetworkApplyNotCommittedError(ctx, iface, handle) + return buildNetworkApplyNotCommittedError(ctx, logger, iface, handle) } -func buildNetworkApplyNotCommittedError(ctx context.Context, iface string, handle *networkRollbackHandle) *NetworkApplyNotCommittedError { +func buildNetworkApplyNotCommittedError(ctx context.Context, logger *logging.Logger, iface string, handle *networkRollbackHandle) *NetworkApplyNotCommittedError { + logging.DebugStep(logger, "build not-committed error", "Start: iface=%s handle=%v", iface, handle != nil) + restoredIP := "unknown" if strings.TrimSpace(iface) != "" { + logging.DebugStep(logger, "build not-committed error", "Querying current IP for iface=%s", iface) if ep, err := currentNetworkEndpoint(ctx, iface, 1*time.Second); err == nil && len(ep.Addresses) > 0 { restoredIP = strings.Join(ep.Addresses, ", ") + logging.DebugStep(logger, "build not-committed error", "Current IP: %s", restoredIP) + } else if err != nil { + logging.DebugStep(logger, "build not-committed error", "Failed to get IP: %v", err) } } rollbackArmed := true + logging.DebugStep(logger, "build not-committed error", "Checking rollback marker status") if handle == nil { rollbackArmed = false + logging.DebugStep(logger, "build not-committed error", "No handle: rollbackArmed=false") } else if strings.TrimSpace(handle.markerPath) != "" { if _, statErr := restoreFS.Stat(handle.markerPath); statErr != nil { // Marker missing => rollback likely already executed (or was manually removed). rollbackArmed = false + logging.DebugStep(logger, "build not-committed error", "Marker missing (%s): rollbackArmed=false", handle.markerPath) + } else { + logging.DebugStep(logger, "build not-committed error", "Marker exists (%s): rollbackArmed=true", handle.markerPath) } } @@ -461,6 +477,8 @@ func buildNetworkApplyNotCommittedError(ctx context.Context, iface string, handl if handle != nil { rollbackLog = strings.TrimSpace(handle.logPath) } + + logging.DebugStep(logger, "build not-committed error", "Result: ip=%s armed=%v log=%s", restoredIP, rollbackArmed, rollbackLog) return &NetworkApplyNotCommittedError{ RollbackLog: rollbackLog, RestoredIP: strings.TrimSpace(restoredIP), @@ -468,18 +486,31 @@ func buildNetworkApplyNotCommittedError(ctx context.Context, iface string, handl } } -func rollbackAlreadyRunning(ctx context.Context, handle *networkRollbackHandle) bool { - if handle == nil || strings.TrimSpace(handle.unitName) == "" || !commandAvailable("systemctl") { +func rollbackAlreadyRunning(ctx context.Context, logger *logging.Logger, handle *networkRollbackHandle) bool { + if handle == nil || strings.TrimSpace(handle.unitName) == "" { + logging.DebugStep(logger, "rollback already running", "Skip check: handle=%v unitName=%q", handle != nil, "") + return false + } + if !commandAvailable("systemctl") { + logging.DebugStep(logger, "rollback already running", "Skip check: systemctl not available") return false } + + serviceUnit := strings.TrimSpace(handle.unitName) + ".service" + logging.DebugStep(logger, "rollback already running", "Checking systemctl is-active %s", serviceUnit) + checkCtx, cancel := context.WithTimeout(ctx, 2*time.Second) defer cancel() - out, err := restoreCmd.Run(checkCtx, "systemctl", "is-active", strings.TrimSpace(handle.unitName)+".service") + out, err := restoreCmd.Run(checkCtx, "systemctl", "is-active", serviceUnit) if err != nil { + logging.DebugStep(logger, "rollback already running", "systemctl is-active failed: %v (assuming not running)", err) return false } + state := strings.TrimSpace(string(out)) - return state == "active" || state == "activating" + running := state == "active" || state == "activating" + logging.DebugStep(logger, "rollback already running", "Service state=%s running=%v", state, running) + return running } func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath string, timeout time.Duration, workDir string) (handle *networkRollbackHandle, err error) { @@ -492,6 +523,7 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath if timeout <= 0 { return nil, fmt.Errorf("invalid rollback timeout") } + logging.DebugStep(logger, "arm network rollback", "Parameters validated: backup=%s timeout=%s", backupPath, timeout) logging.DebugStep(logger, "arm network rollback", "Prepare rollback work directory") baseDir := strings.TrimSpace(workDir) @@ -513,17 +545,20 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath armedAt: time.Now(), timeout: timeout, } + logging.DebugStep(logger, "arm network rollback", "Handle created: marker=%s script=%s log=%s", handle.markerPath, handle.scriptPath, handle.logPath) logging.DebugStep(logger, "arm network rollback", "Write rollback marker: %s", handle.markerPath) if err := restoreFS.WriteFile(handle.markerPath, []byte("pending\n"), 0o640); err != nil { return nil, fmt.Errorf("write rollback marker: %w", err) } + logging.DebugStep(logger, "arm network rollback", "Marker written successfully") logging.DebugStep(logger, "arm network rollback", "Write rollback script: %s", handle.scriptPath) script := buildRollbackScript(handle.markerPath, backupPath, handle.logPath, true) if err := restoreFS.WriteFile(handle.scriptPath, []byte(script), 0o640); err != nil { return nil, fmt.Errorf("write rollback script: %w", err) } + logging.DebugStep(logger, "arm network rollback", "Script written successfully (%d bytes)", len(script)) timeoutSeconds := int(timeout.Seconds()) if timeoutSeconds < 1 { @@ -543,8 +578,11 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath logger.Warning("systemd-run failed, falling back to background timer: %v", err) logger.Debug("systemd-run output: %s", strings.TrimSpace(string(output))) handle.unitName = "" - } else if len(output) > 0 { - logger.Debug("systemd-run output: %s", strings.TrimSpace(string(output))) + } else { + logging.DebugStep(logger, "arm network rollback", "Timer armed via systemd-run: unit=%s", handle.unitName) + if len(output) > 0 { + logger.Debug("systemd-run output: %s", strings.TrimSpace(string(output))) + } } } @@ -555,6 +593,7 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath logger.Debug("Background rollback output: %s", strings.TrimSpace(string(output))) return nil, fmt.Errorf("failed to arm rollback timer: %w", err) } + logging.DebugStep(logger, "arm network rollback", "Timer armed via nohup (fallback)") } logger.Info("Rollback timer armed (%ds). Work dir: %s (log: %s)", timeoutSeconds, baseDir, handle.logPath) @@ -563,23 +602,37 @@ func armNetworkRollback(ctx context.Context, logger *logging.Logger, backupPath func disarmNetworkRollback(ctx context.Context, logger *logging.Logger, handle *networkRollbackHandle) { if handle == nil { + logging.DebugStep(logger, "disarm network rollback", "Skip: handle is nil") return } - logging.DebugStep(logger, "disarm network rollback", "Disarming rollback (marker=%s unit=%s)", strings.TrimSpace(handle.markerPath), strings.TrimSpace(handle.unitName)) + + logging.DebugStep(logger, "disarm network rollback", "Start: marker=%s unit=%s", strings.TrimSpace(handle.markerPath), strings.TrimSpace(handle.unitName)) + // Remove marker first so that even if the timer triggers concurrently the rollback script exits early. if strings.TrimSpace(handle.markerPath) != "" { + logging.DebugStep(logger, "disarm network rollback", "Removing marker file: %s", handle.markerPath) if err := restoreFS.Remove(handle.markerPath); err != nil && !errors.Is(err, os.ErrNotExist) { - logger.Debug("Failed to remove rollback marker %s: %v", handle.markerPath, err) + logger.Warning("Failed to remove rollback marker %s: %v", handle.markerPath, err) + } else { + logging.DebugStep(logger, "disarm network rollback", "Marker removed successfully") } } + if strings.TrimSpace(handle.unitName) != "" && commandAvailable("systemctl") { // Stop the timer only. If the service already started, let it finish. timerUnit := strings.TrimSpace(handle.unitName) + ".timer" + logging.DebugStep(logger, "disarm network rollback", "Stopping timer: %s", timerUnit) if output, err := restoreCmd.Run(ctx, "systemctl", "stop", timerUnit); err != nil { - logger.Debug("Failed to stop rollback timer %s: %v (output: %s)", timerUnit, err, strings.TrimSpace(string(output))) + logger.Warning("Failed to stop rollback timer %s: %v (output: %s)", timerUnit, err, strings.TrimSpace(string(output))) + } else { + logging.DebugStep(logger, "disarm network rollback", "Timer stopped successfully") } + + logging.DebugStep(logger, "disarm network rollback", "Resetting failed units") _, _ = restoreCmd.Run(ctx, "systemctl", "reset-failed", strings.TrimSpace(handle.unitName)+".service", timerUnit) } + + logging.DebugStep(logger, "disarm network rollback", "Disarm complete") } func maybeRepairNICNamesCLI(ctx context.Context, reader *bufio.Reader, logger *logging.Logger, archivePath string) *nicRepairResult { @@ -769,18 +822,24 @@ func defaultNetworkPortChecks(systemType SystemType) []tcpPortCheck { } func promptNetworkCommitWithCountdown(ctx context.Context, reader *bufio.Reader, logger *logging.Logger, remaining time.Duration) (bool, error) { + logging.DebugStep(logger, "prompt commit", "Start: remaining=%s", remaining) + if remaining <= 0 { + logging.DebugStep(logger, "prompt commit", "No time remaining, returning timeout") return false, context.DeadlineExceeded } - fmt.Printf("Type COMMIT within %d seconds to keep the new network configuration.\n", int(remaining.Seconds())) deadline := time.Now().Add(remaining) + logging.DebugStep(logger, "prompt commit", "Deadline set: %s", deadline.Format(time.RFC3339)) + + fmt.Printf("Type COMMIT within %d seconds to keep the new network configuration.\n", int(remaining.Seconds())) ctxTimeout, cancel := context.WithDeadline(ctx, deadline) defer cancel() inputCh := make(chan string, 1) errCh := make(chan error, 1) + logging.DebugStep(logger, "prompt commit", "Starting input reader goroutine") go func() { line, err := input.ReadLineWithContext(ctxTimeout, reader) if err != nil { @@ -793,6 +852,8 @@ func promptNetworkCommitWithCountdown(ctx context.Context, reader *bufio.Reader, ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() + logging.DebugStep(logger, "prompt commit", "Waiting for user input...") + for { select { case <-ticker.C: @@ -803,20 +864,27 @@ func promptNetworkCommitWithCountdown(ctx context.Context, reader *bufio.Reader, fmt.Fprintf(os.Stderr, "\rRollback in %ds... Type COMMIT to keep: ", int(left.Seconds())) if left <= 0 { fmt.Fprintln(os.Stderr) + logging.DebugStep(logger, "prompt commit", "Timeout expired, returning DeadlineExceeded") return false, context.DeadlineExceeded } case line := <-inputCh: fmt.Fprintln(os.Stderr) - if strings.EqualFold(strings.TrimSpace(line), "commit") { + trimmedLine := strings.TrimSpace(line) + logging.DebugStep(logger, "prompt commit", "User input received: %q", trimmedLine) + if strings.EqualFold(trimmedLine, "commit") { + logging.DebugStep(logger, "prompt commit", "Result: COMMITTED") return true, nil } + logging.DebugStep(logger, "prompt commit", "Result: NOT COMMITTED (input was not 'commit')") return false, nil case err := <-errCh: fmt.Fprintln(os.Stderr) + logging.DebugStep(logger, "prompt commit", "Input error received: %v", err) if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { + logging.DebugStep(logger, "prompt commit", "Result: context deadline/canceled") return false, err } - logger.Debug("Commit input error: %v", err) + logging.DebugStep(logger, "prompt commit", "Result: NOT COMMITTED (input error)") return false, err } } @@ -881,41 +949,70 @@ func buildRollbackScript(markerPath, backupPath, logPath string, restartNetworki fmt.Sprintf("LOG=%s", shellQuote(logPath)), fmt.Sprintf("MARKER=%s", shellQuote(markerPath)), fmt.Sprintf("BACKUP=%s", shellQuote(backupPath)), - `if [ ! -f "$MARKER" ]; then exit 0; fi`, - `echo "Rollback started at $(date -Is)" >> "$LOG"`, - `echo "Rollback backup: $BACKUP" >> "$LOG"`, - `echo "Extract phase: restore files from rollback archive" >> "$LOG"`, + // Header + `echo "[INFO] ========================================" >> "$LOG"`, + `echo "[INFO] NETWORK ROLLBACK SCRIPT STARTED" >> "$LOG"`, + `echo "[INFO] Timestamp: $(date -Is)" >> "$LOG"`, + `echo "[INFO] Marker: $MARKER" >> "$LOG"`, + `echo "[INFO] Backup: $BACKUP" >> "$LOG"`, + `echo "[INFO] ========================================" >> "$LOG"`, + // Check marker + `echo "[DEBUG] Checking marker file..." >> "$LOG"`, + `if [ ! -f "$MARKER" ]; then`, + ` echo "[INFO] Marker not found - rollback cancelled (already disarmed)" >> "$LOG"`, + ` echo "[INFO] ========================================" >> "$LOG"`, + ` echo "[INFO] ROLLBACK SCRIPT EXITED (no-op)" >> "$LOG"`, + ` echo "[INFO] ========================================" >> "$LOG"`, + ` exit 0`, + `fi`, + `echo "[DEBUG] Marker exists, proceeding with rollback" >> "$LOG"`, + // Extract phase + `echo "[INFO] --- EXTRACT PHASE ---" >> "$LOG"`, + `echo "[DEBUG] Executing: tar -xzf $BACKUP -C /" >> "$LOG"`, `TAR_OK=0`, - `if tar -xzf "$BACKUP" -C / >> "$LOG" 2>&1; then TAR_OK=1; echo "Extract phase: OK" >> "$LOG"; else echo "WARN: failed to extract rollback archive; skipping prune phase" >> "$LOG"; fi`, + `if tar -xzf "$BACKUP" -C / >> "$LOG" 2>&1; then`, + ` TAR_OK=1`, + ` echo "[OK] Extract phase completed successfully" >> "$LOG"`, + `else`, + ` RC=$?`, + ` echo "[ERROR] Extract phase failed (exit=$RC) - skipping prune phase" >> "$LOG"`, + `fi`, + // Prune phase `if [ "$TAR_OK" -eq 1 ] && [ -d /etc/network ]; then`, - ` echo "Prune phase: removing files created after backup (network-only)" >> "$LOG"`, - ` echo "Prune scope: /etc/network (+ /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg, /etc/dnsmasq.d/lxc-vmbr1.conf)" >> "$LOG"`, + ` echo "[INFO] --- PRUNE PHASE ---" >> "$LOG"`, + ` echo "[DEBUG] Scope: /etc/network (+ /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg, /etc/dnsmasq.d/lxc-vmbr1.conf)" >> "$LOG"`, ` (`, ` set +e`, + ` echo "[DEBUG] Creating temp files for prune operation..."`, ` MANIFEST_ALL=$(mktemp /tmp/proxsave/network_rollback_manifest_all_XXXXXX 2>/dev/null)`, ` MANIFEST=$(mktemp /tmp/proxsave/network_rollback_manifest_XXXXXX 2>/dev/null)`, ` CANDIDATES=$(mktemp /tmp/proxsave/network_rollback_candidates_XXXXXX 2>/dev/null)`, ` CLEANUP=$(mktemp /tmp/proxsave/network_rollback_cleanup_XXXXXX 2>/dev/null)`, ` if [ -z "$MANIFEST_ALL" ] || [ -z "$MANIFEST" ] || [ -z "$CANDIDATES" ] || [ -z "$CLEANUP" ]; then`, - ` echo "WARN: mktemp failed; skipping prune"`, + ` echo "[WARN] mktemp failed - skipping prune phase"`, ` exit 0`, ` fi`, - ` echo "Listing rollback archive contents..."`, + ` echo "[DEBUG] Temp files created: manifest_all=$MANIFEST_ALL manifest=$MANIFEST candidates=$CANDIDATES cleanup=$CLEANUP"`, + ` echo "[DEBUG] Listing rollback archive contents..."`, ` if ! tar -tzf "$BACKUP" > "$MANIFEST_ALL"; then`, - ` echo "WARN: failed to list rollback archive; skipping prune"`, + ` echo "[WARN] Failed to list rollback archive - skipping prune phase"`, ` rm -f "$MANIFEST_ALL" "$MANIFEST" "$CANDIDATES" "$CLEANUP"`, ` exit 0`, ` fi`, - ` echo "Normalizing manifest paths..."`, + ` MANIFEST_COUNT=$(wc -l < "$MANIFEST_ALL")`, + ` echo "[DEBUG] Archive contains $MANIFEST_COUNT entries"`, + ` echo "[DEBUG] Normalizing manifest paths..."`, ` sed 's#^\./##' "$MANIFEST_ALL" > "$MANIFEST"`, ` if ! grep -q '^etc/network/' "$MANIFEST"; then`, - ` echo "WARN: rollback archive does not include etc/network; skipping prune"`, + ` echo "[WARN] Rollback archive does not include etc/network - skipping prune phase"`, ` rm -f "$MANIFEST_ALL" "$MANIFEST" "$CANDIDATES" "$CLEANUP"`, ` exit 0`, ` fi`, - ` echo "Scanning current filesystem under /etc/network..."`, + ` echo "[DEBUG] Scanning current filesystem under /etc/network..."`, ` find /etc/network -mindepth 1 \( -type f -o -type l \) -print > "$CANDIDATES" 2>/dev/null || true`, - ` echo "Computing cleanup list (present on disk, absent in backup)..."`, + ` CANDIDATES_COUNT=$(wc -l < "$CANDIDATES")`, + ` echo "[DEBUG] Found $CANDIDATES_COUNT files/links on disk"`, + ` echo "[DEBUG] Computing cleanup list (present on disk, absent in backup)..."`, ` : > "$CLEANUP"`, ` while IFS= read -r path; do`, ` rel=${path#/}`, @@ -932,64 +1029,74 @@ func buildRollbackScript(markerPath, backupPath, logPath string, restartNetworki ` fi`, ` done`, ` if [ -s "$CLEANUP" ]; then`, - ` echo "Pruning extraneous network files (not present in backup):"`, + ` CLEANUP_COUNT=$(wc -l < "$CLEANUP")`, + ` echo "[DEBUG] Found $CLEANUP_COUNT extraneous files to prune:"`, ` cat "$CLEANUP"`, + ` echo "[DEBUG] Removing extraneous files..."`, + ` REMOVED=0`, ` while IFS= read -r rmPath; do`, - ` rm -f -- "$rmPath" || true`, + ` if rm -f -- "$rmPath"; then`, + ` REMOVED=$((REMOVED+1))`, + ` else`, + ` echo "[WARN] Failed to remove: $rmPath"`, + ` fi`, ` done < "$CLEANUP"`, + ` echo "[OK] Prune phase completed - removed $REMOVED files"`, ` else`, - ` echo "No extraneous network files to prune."`, + ` echo "[OK] Prune phase completed - no extraneous files to remove"`, ` fi`, - ` echo "Prune phase: done"`, ` rm -f "$MANIFEST_ALL" "$MANIFEST" "$CANDIDATES" "$CLEANUP"`, ` ) >> "$LOG" 2>&1 || true`, `fi`, } if restartNetworking { - lines = append(lines, - `echo "Restart networking after rollback" >> "$LOG"`, - `echo "Live state before reload:" >> "$LOG"`, - `ip -br addr >> "$LOG" 2>&1 || true`, - `ip route show >> "$LOG" 2>&1 || true`, + lines = append(lines, + `echo "[INFO] Restart networking after rollback" >> "$LOG"`, + `echo "[INFO] Live state before reload:" >> "$LOG"`, + `ip -br addr >> "$LOG" 2>&1 || true`, + `ip route show >> "$LOG" 2>&1 || true`, `RELOAD_OK=0`, `if command -v ifreload >/dev/null 2>&1; then`, - ` echo "Executing: ifreload -a" >> "$LOG"`, + ` echo "[INFO] Executing: ifreload -a" >> "$LOG"`, ` if ifreload -a >> "$LOG" 2>&1; then`, ` RELOAD_OK=1`, - ` echo "ifreload: OK" >> "$LOG"`, + ` echo "[OK] ifreload -a completed successfully" >> "$LOG"`, ` else`, ` RC=$?`, - ` echo "ERROR: ifreload -a failed (exit=$RC)" >> "$LOG"`, + ` echo "[ERROR] ifreload -a failed (exit=$RC)" >> "$LOG"`, ` fi`, `fi`, `if [ "$RELOAD_OK" -eq 0 ] && command -v systemctl >/dev/null 2>&1; then`, - ` echo "Executing fallback: systemctl restart networking" >> "$LOG"`, + ` echo "[INFO] Executing fallback: systemctl restart networking" >> "$LOG"`, ` if systemctl restart networking >> "$LOG" 2>&1; then`, ` RELOAD_OK=1`, - ` echo "systemctl restart networking: OK" >> "$LOG"`, + ` echo "[OK] systemctl restart networking completed successfully" >> "$LOG"`, ` else`, ` RC=$?`, - ` echo "ERROR: systemctl restart networking failed (exit=$RC)" >> "$LOG"`, + ` echo "[ERROR] systemctl restart networking failed (exit=$RC)" >> "$LOG"`, ` fi`, `fi`, `if [ "$RELOAD_OK" -eq 0 ] && command -v ifup >/dev/null 2>&1; then`, - ` echo "Executing fallback: ifup -a" >> "$LOG"`, + ` echo "[INFO] Executing fallback: ifup -a" >> "$LOG"`, ` if ifup -a >> "$LOG" 2>&1; then`, ` RELOAD_OK=1`, - ` echo "ifup -a: OK" >> "$LOG"`, + ` echo "[OK] ifup -a completed successfully" >> "$LOG"`, ` else`, ` RC=$?`, - ` echo "ERROR: ifup -a failed (exit=$RC)" >> "$LOG"`, + ` echo "[ERROR] ifup -a failed (exit=$RC)" >> "$LOG"`, ` fi`, - `fi`, - `echo "Live state after reload:" >> "$LOG"`, - `ip -br addr >> "$LOG" 2>&1 || true`, - `ip route show >> "$LOG" 2>&1 || true`, - ) - } else { - lines = append(lines, `echo "Restart networking after rollback: skipped (manual)" >> "$LOG"`) - } + `fi`, + `if [ "$RELOAD_OK" -eq 0 ]; then`, + ` echo "[WARN] All network reload methods failed - network may not be properly configured" >> "$LOG"`, + `fi`, + `echo "[INFO] Live state after reload:" >> "$LOG"`, + `ip -br addr >> "$LOG" 2>&1 || true`, + `ip route show >> "$LOG" 2>&1 || true`, + ) + } else { + lines = append(lines, `echo "[INFO] Restart networking after rollback: skipped (manual)" >> "$LOG"`) + } lines = append(lines, `rm -f "$MARKER"`, diff --git a/internal/orchestrator/network_diagnostics.go b/internal/orchestrator/network_diagnostics.go index 42d2a5e..1509f17 100644 --- a/internal/orchestrator/network_diagnostics.go +++ b/internal/orchestrator/network_diagnostics.go @@ -1,9 +1,14 @@ package orchestrator import ( + "bufio" "context" + "crypto/sha256" + "encoding/hex" "fmt" + "os" "path/filepath" + "sort" "strings" "sync/atomic" "time" @@ -13,6 +18,8 @@ import ( var networkDiagnosticsSequence uint64 +const snapshotLogMaxExtractedLines = 20 + func createNetworkDiagnosticsDir() (string, error) { baseDir := "/tmp/proxsave" if err := restoreFS.MkdirAll(baseDir, 0o755); err != nil { @@ -45,6 +52,7 @@ func writeNetworkSnapshot(ctx context.Context, logger *logging.Logger, diagnosti b.WriteString(fmt.Sprintf("GeneratedAt: %s\n", nowRestore().Format(time.RFC3339))) b.WriteString(fmt.Sprintf("Label: %s\n\n", label)) + b.WriteString("=== LIVE NETWORK STATE ===\n\n") commands := [][]string{ {"ip", "-br", "link"}, {"ip", "-br", "addr"}, @@ -60,6 +68,9 @@ func writeNetworkSnapshot(ctx context.Context, logger *logging.Logger, diagnosti ctxTimeout, cancel := context.WithTimeout(ctx, timeout) out, err := restoreCmd.Run(ctxTimeout, cmd[0], cmd[1:]...) cancel() + if logger != nil { + debugLogCommandResult(logger, "network snapshot", label, cmd, out, err) + } if len(out) > 0 { b.Write(out) if out[len(out)-1] != '\n' { @@ -75,6 +86,23 @@ func writeNetworkSnapshot(ctx context.Context, logger *logging.Logger, diagnosti b.WriteString("\n") } + b.WriteString("=== ON-DISK NETWORK CONFIG ===\n\n") + appendFileSnapshot(logger, label, &b, "/etc/network/interfaces", 64*1024) + appendDirSnapshot(logger, label, &b, "/etc/network/interfaces.d", 64*1024) + appendFileSnapshot(logger, label, &b, "/etc/hosts", 64*1024) + appendFileSnapshot(logger, label, &b, "/etc/hostname", 8*1024) + appendFileSnapshot(logger, label, &b, "/etc/resolv.conf", 16*1024) + + b.WriteString("=== NETWORK STACK SERVICES ===\n\n") + appendCommandSnapshot(ctx, logger, &b, timeout, []string{"systemctl", "is-active", "networking"}) + appendCommandSnapshot(ctx, logger, &b, timeout, []string{"systemctl", "is-active", "systemd-networkd"}) + appendCommandSnapshot(ctx, logger, &b, timeout, []string{"systemctl", "is-active", "NetworkManager"}) + b.WriteString("\n") + + b.WriteString("=== ifupdown (ifquery --running -a) ===\n\n") + appendCommandSnapshot(ctx, logger, &b, timeout, []string{"ifquery", "--running", "-a"}) + b.WriteString("\n") + if err := restoreFS.WriteFile(path, []byte(b.String()), 0o600); err != nil { return "", err } @@ -82,6 +110,221 @@ func writeNetworkSnapshot(ctx context.Context, logger *logging.Logger, diagnosti return path, nil } +func appendCommandSnapshot(ctx context.Context, logger *logging.Logger, b *strings.Builder, timeout time.Duration, cmd []string) { + if len(cmd) == 0 { + return + } + b.WriteString("$ " + strings.Join(cmd, " ") + "\n") + ctxTimeout, cancel := context.WithTimeout(ctx, timeout) + out, err := restoreCmd.Run(ctxTimeout, cmd[0], cmd[1:]...) + cancel() + if logger != nil { + debugLogCommandResult(logger, "network snapshot", "command", cmd, out, err) + } + if len(out) > 0 { + b.Write(out) + if out[len(out)-1] != '\n' { + b.WriteString("\n") + } + } + if err != nil { + b.WriteString(fmt.Sprintf("ERROR: %v\n", err)) + if logger != nil { + logger.Debug("Network snapshot command failed: %s: %v", strings.Join(cmd, " "), err) + } + } + b.WriteString("\n") +} + +func appendFileSnapshot(logger *logging.Logger, label string, b *strings.Builder, path string, maxBytes int) { + b.WriteString("## File: " + path + "\n") + if logger != nil { + logging.DebugStep(logger, "network snapshot", "Read file (%s): %s", label, path) + } + info, err := restoreFS.Stat(path) + if err != nil { + b.WriteString(fmt.Sprintf("ERROR: %v\n\n", err)) + if logger != nil { + logging.DebugStep(logger, "network snapshot", "Stat failed (%s): %s: %v", label, path, err) + } + return + } + b.WriteString(fmt.Sprintf("Mode: %s\n", info.Mode().String())) + b.WriteString(fmt.Sprintf("Size: %d\n", info.Size())) + b.WriteString(fmt.Sprintf("ModTime: %s\n\n", info.ModTime().Format(time.RFC3339))) + data, err := restoreFS.ReadFile(path) + if err != nil { + b.WriteString(fmt.Sprintf("ERROR: %v\n\n", err)) + if logger != nil { + logging.DebugStep(logger, "network snapshot", "Read failed (%s): %s: %v", label, path, err) + } + return + } + if logger != nil { + debugLogOnDiskFile(logger, label, path, info, data) + } + if maxBytes > 0 && len(data) > maxBytes { + b.Write(data[:maxBytes]) + if maxBytes > 0 && (len(data) == 0 || data[maxBytes-1] != '\n') { + b.WriteString("\n") + } + b.WriteString(fmt.Sprintf("\n[truncated: %d of %d bytes]\n\n", maxBytes, len(data))) + return + } + b.Write(data) + if len(data) > 0 && data[len(data)-1] != '\n' { + b.WriteString("\n") + } + b.WriteString("\n") +} + +func appendDirSnapshot(logger *logging.Logger, label string, b *strings.Builder, dir string, maxBytesPerFile int) { + b.WriteString("## Dir: " + dir + "\n") + if logger != nil { + logging.DebugStep(logger, "network snapshot", "Read dir (%s): %s", label, dir) + } + entries, err := restoreFS.ReadDir(dir) + if err != nil { + b.WriteString(fmt.Sprintf("ERROR: %v\n\n", err)) + if logger != nil { + logging.DebugStep(logger, "network snapshot", "ReadDir failed (%s): %s: %v", label, dir, err) + } + return + } + if len(entries) == 0 { + b.WriteString("(empty)\n\n") + if logger != nil { + logging.DebugStep(logger, "network snapshot", "Dir empty (%s): %s", label, dir) + } + return + } + + type entryInfo struct { + name string + mode os.FileMode + } + var list []entryInfo + for _, e := range entries { + info, err := e.Info() + if err != nil { + continue + } + list = append(list, entryInfo{name: e.Name(), mode: info.Mode()}) + } + sort.Slice(list, func(i, j int) bool { return list[i].name < list[j].name }) + if logger != nil { + names := make([]string, 0, len(list)) + for _, e := range list { + names = append(names, e.name) + } + logging.DebugStep(logger, "network snapshot", "Dir entries (%s): %s: %s", label, dir, strings.Join(names, ", ")) + } + for _, e := range list { + b.WriteString(fmt.Sprintf("- %s (%s)\n", e.name, e.mode.String())) + } + b.WriteString("\n") + + for _, e := range list { + full := filepath.Join(dir, e.name) + if e.mode.IsRegular() || (e.mode&os.ModeSymlink != 0) { + appendFileSnapshot(logger, label, b, full, maxBytesPerFile) + } + } +} + +func debugLogCommandResult(logger *logging.Logger, operation, label string, cmd []string, out []byte, err error) { + if logger == nil || len(cmd) == 0 { + return + } + cmdStr := strings.Join(cmd, " ") + if err != nil { + logging.DebugStep(logger, operation, "Command error (%s): %s: %v", label, cmdStr, err) + } else { + logging.DebugStep(logger, operation, "Command ok (%s): %s", label, cmdStr) + } + if len(out) == 0 { + return + } + s := strings.TrimSpace(string(out)) + if s == "" { + return + } + preview := s + if len(preview) > 800 { + preview = preview[:800] + "…" + } + logging.DebugStep(logger, operation, "Command output (%s): %s: %s", label, cmdStr, preview) +} + +func debugLogOnDiskFile(logger *logging.Logger, label, path string, info os.FileInfo, data []byte) { + if logger == nil || info == nil { + return + } + sum := sha256.Sum256(data) + sha := hex.EncodeToString(sum[:]) + logging.DebugStep(logger, "network snapshot", "File ok (%s): %s mode=%s size=%d mtime=%s sha256=%s", label, path, info.Mode().String(), info.Size(), info.ModTime().Format(time.RFC3339), sha) + + extracted := extractInterestingLines(path, data, snapshotLogMaxExtractedLines) + if len(extracted) == 0 { + return + } + for _, line := range extracted { + logging.DebugStep(logger, "network snapshot", "File excerpt (%s): %s: %s", label, path, line) + } +} + +func extractInterestingLines(path string, data []byte, limit int) []string { + if limit <= 0 { + return nil + } + var out []string + scanner := bufio.NewScanner(strings.NewReader(string(data))) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + if isInterestingNetworkLine(path, line) { + out = append(out, line) + if len(out) >= limit { + break + } + } + } + return out +} + +func isInterestingNetworkLine(path, line string) bool { + switch path { + case "/etc/network/interfaces": + return strings.HasPrefix(line, "auto ") || + strings.HasPrefix(line, "iface ") || + strings.HasPrefix(line, "allow-hotplug ") || + strings.HasPrefix(line, "address ") || + strings.HasPrefix(line, "gateway ") || + strings.HasPrefix(line, "bridge-") || + strings.HasPrefix(line, "source ") + case "/etc/resolv.conf": + return strings.HasPrefix(line, "nameserver ") || + strings.HasPrefix(line, "search ") || + strings.HasPrefix(line, "domain ") + case "/etc/hostname": + return true + case "/etc/hosts": + if strings.HasPrefix(line, "127.") || strings.HasPrefix(line, "::1") { + return false + } + return strings.HasPrefix(line, "10.") || + strings.HasPrefix(line, "172.") || + strings.HasPrefix(line, "192.") || + strings.Contains(line, ".local") || + strings.Contains(line, "pbs") || + strings.Contains(line, "pve") + default: + return true + } +} + func writeNetworkHealthReportFile(diagnosticsDir string, report networkHealthReport) (string, error) { return writeNetworkHealthReportFileNamed(diagnosticsDir, "health_after.txt", report) } diff --git a/internal/orchestrator/prompts_cli.go b/internal/orchestrator/prompts_cli.go index 7958157..4f4c03a 100644 --- a/internal/orchestrator/prompts_cli.go +++ b/internal/orchestrator/prompts_cli.go @@ -3,10 +3,14 @@ package orchestrator import ( "bufio" "context" + "errors" "fmt" + "os" "strings" + "time" "github.com/tis24dev/proxsave/internal/input" + "github.com/tis24dev/proxsave/internal/logging" ) func promptYesNo(ctx context.Context, reader *bufio.Reader, prompt string) (bool, error) { @@ -42,3 +46,87 @@ func promptYesNoWithDefault(ctx context.Context, reader *bufio.Reader, prompt st } } } + +// promptYesNoWithCountdown prompts the user for a yes/no answer while showing a visible countdown. +// If no input is received before the timeout expires, it proceeds safely with "No". +// Pressing Enter chooses the provided defaultYes. +func promptYesNoWithCountdown(ctx context.Context, reader *bufio.Reader, logger *logging.Logger, question string, timeout time.Duration, defaultYes bool) (bool, error) { + defStr := "[Y/n]" + if !defaultYes { + defStr = "[y/N]" + } + + question = strings.TrimSpace(question) + if question == "" { + question = "Proceed?" + } + if timeout <= 0 { + return promptYesNoWithDefault(ctx, reader, fmt.Sprintf("%s %s ", question, defStr), defaultYes) + } + + deadline := time.Now().Add(timeout) + logging.DebugStep(logger, "prompt yes/no", "Start: question=%q defaultYes=%v timeout=%s", question, defaultYes, timeout) + + ctxTimeout, cancel := context.WithDeadline(ctx, deadline) + defer cancel() + + inputCh := make(chan string, 1) + errCh := make(chan error, 1) + + go func() { + line, err := input.ReadLineWithContext(ctxTimeout, reader) + if err != nil { + errCh <- err + return + } + inputCh <- line + }() + + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + left := time.Until(deadline) + if left < 0 { + left = 0 + } + fmt.Fprintf(os.Stderr, "\rAuto-skip in %ds... %s %s ", int(left.Seconds()), question, defStr) + if left <= 0 { + fmt.Fprintln(os.Stderr) + logging.DebugStep(logger, "prompt yes/no", "Timeout expired: proceeding with No") + logger.Info("No response within %ds; proceeding with No.", int(timeout.Seconds())) + return false, nil + } + case line := <-inputCh: + fmt.Fprintln(os.Stderr) + trimmed := strings.ToLower(strings.TrimSpace(line)) + logging.DebugStep(logger, "prompt yes/no", "User input received: %q", trimmed) + switch trimmed { + case "": + return defaultYes, nil + case "y", "yes": + return true, nil + case "n", "no": + return false, nil + default: + logger.Info("Unrecognized input %q; proceeding with No.", strings.TrimSpace(line)) + return false, nil + } + case err := <-errCh: + fmt.Fprintln(os.Stderr) + if errors.Is(err, context.DeadlineExceeded) { + logging.DebugStep(logger, "prompt yes/no", "Input timed out: proceeding with No") + logger.Info("No response within %ds; proceeding with No.", int(timeout.Seconds())) + return false, nil + } + if errors.Is(err, context.Canceled) { + logging.DebugStep(logger, "prompt yes/no", "Input canceled: %v", err) + return false, err + } + logging.DebugStep(logger, "prompt yes/no", "Input error: %v", err) + return false, err + } + } +} diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 03500c0..0e2a2a8 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -232,7 +232,9 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging } clusterServicesStopped = true defer func() { - if err := startPVEClusterServices(ctx, logger); err != nil { + restartCtx, cancel := context.WithTimeout(context.Background(), 2*serviceStartTimeout+2*serviceVerifyTimeout+10*time.Second) + defer cancel() + if err := startPVEClusterServices(restartCtx, logger); err != nil { logger.Warning("Failed to restart PVE services after restore: %v", err) } }() @@ -254,7 +256,9 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging } else { pbsServicesStopped = true defer func() { - if err := startPBSServices(ctx, logger); err != nil { + restartCtx, cancel := context.WithTimeout(context.Background(), 2*serviceStartTimeout+2*serviceVerifyTimeout+10*time.Second) + defer cancel() + if err := startPBSServices(restartCtx, logger); err != nil { logger.Warning("Failed to restart PBS services after restore: %v", err) } }() @@ -353,6 +357,9 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging } if exportLog, err := extractSelectiveArchive(ctx, prepared.ArchivePath, exportRoot, plan.ExportCategories, RestoreModeCustom, logger); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Export completed with errors: %v", err) } else { exportLogPath = exportLog @@ -364,6 +371,9 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging if exportRoot == "" { logger.Warning("Cluster SAFE mode selected but export directory not available; skipping automatic pvesh apply") } else if err := runSafeClusterApply(ctx, reader, exportRoot, logger); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Cluster SAFE apply completed with errors: %v", err) } } @@ -380,6 +390,9 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging } if stageLog, err := extractSelectiveArchive(ctx, prepared.ArchivePath, stageRoot, plan.StagedCategories, RestoreModeCustom, logger); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Staging completed with errors: %v", err) } else { stageLogPath = stageLog @@ -387,12 +400,18 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging logger.Info("") if err := maybeApplyPBSConfigsFromStage(ctx, logger, plan, stageRoot, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("PBS staged config apply: %v", err) } } stageRootForNetworkApply := stageRoot if installed, err := maybeInstallNetworkConfigFromStage(ctx, logger, plan, stageRoot, prepared.ArchivePath, networkRollbackBackup, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Network staged install: %v", err) } else if installed { stageRootForNetworkApply = "" @@ -428,12 +447,20 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging } else { fsCategory := []Category{*fsCat} if _, err := extractSelectiveArchive(ctx, prepared.ArchivePath, fsTempDir, fsCategory, RestoreModeCustom, logger); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Failed to extract filesystem config for merge: %v", err) } else { // Perform Smart Merge currentFstab := filepath.Join(destRoot, "etc", "fstab") backupFstab := filepath.Join(fsTempDir, "etc", "fstab") if err := SmartMergeFstab(ctx, logger, reader, currentFstab, backupFstab, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + logger.Info("Restore aborted by user during Smart Filesystem Configuration Merge.") + return err + } + restoreHadWarnings = true logger.Warning("Smart Fstab Merge failed: %v", err) } } @@ -445,6 +472,9 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging if plan.HasCategoryID("network") { logger.Info("") if err := maybeRepairResolvConfAfterRestore(ctx, logger, prepared.ArchivePath, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } restoreHadWarnings = true logger.Warning("DNS resolver repair: %v", err) } @@ -452,6 +482,10 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging logger.Info("") if err := maybeApplyNetworkConfigCLI(ctx, reader, logger, plan, safetyBackup, networkRollbackBackup, stageRootForNetworkApply, prepared.ArchivePath, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + logger.Info("Restore aborted by user during network apply prompt.") + return err + } restoreHadWarnings = true if errors.Is(err, ErrNetworkApplyNotCommitted) { var notCommitted *NetworkApplyNotCommittedError @@ -1188,6 +1222,10 @@ func runFullRestore(ctx context.Context, reader *bufio.Reader, candidate *decryp currentFstab := filepath.Join(destRoot, "etc", "fstab") backupFstab := filepath.Join(fsTempDir, "etc", "fstab") if err := SmartMergeFstab(ctx, logger, reader, currentFstab, backupFstab, dryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + logger.Info("Restore aborted by user during Smart Filesystem Configuration Merge.") + return err + } logger.Warning("Smart Fstab Merge failed: %v", err) } } diff --git a/internal/orchestrator/restore_filesystem.go b/internal/orchestrator/restore_filesystem.go index 7de0557..020a8c6 100644 --- a/internal/orchestrator/restore_filesystem.go +++ b/internal/orchestrator/restore_filesystem.go @@ -8,8 +8,8 @@ import ( "os" "path/filepath" "strings" + "time" - "github.com/tis24dev/proxsave/internal/input" "github.com/tis24dev/proxsave/internal/logging" ) @@ -68,7 +68,7 @@ func SmartMergeFstab(ctx context.Context, logger *logging.Logger, reader *bufio. defaultYes := analysis.RootComparable && analysis.RootMatch && (!analysis.SwapComparable || analysis.SwapMatch) confirmMsg := "Do you want to add the missing mounts (NFS/CIFS and data mounts with verified UUID/LABEL)?" - confirmed, err := confirmLocal(ctx, reader, confirmMsg, defaultYes) + confirmed, err := promptYesNoWithCountdown(ctx, reader, logger, confirmMsg, 90*time.Second, defaultYes) if err != nil { return err } @@ -82,26 +82,6 @@ func SmartMergeFstab(ctx context.Context, logger *logging.Logger, reader *bufio. return applyFstabMerge(ctx, logger, currentRaw, currentFstabPath, analysis.ProposedMounts, dryRun) } -// confirmLocal prompts for yes/no -func confirmLocal(ctx context.Context, reader *bufio.Reader, prompt string, defaultYes bool) (bool, error) { - defStr := "[Y/n]" - if !defaultYes { - defStr = "[y/N]" - } - fmt.Printf("%s %s ", prompt, defStr) - - line, err := input.ReadLineWithContext(ctx, reader) - if err != nil { - return false, err - } - - trimmed := strings.TrimSpace(strings.ToLower(line)) - if trimmed == "" { - return defaultYes, nil - } - return trimmed == "y" || trimmed == "yes", nil -} - func parseFstab(path string) ([]FstabEntry, []string, error) { content, err := restoreFS.ReadFile(path) if err != nil { diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index cad3e57..e7cfd8b 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -15,6 +15,7 @@ import ( "github.com/rivo/tview" "github.com/tis24dev/proxsave/internal/config" + "github.com/tis24dev/proxsave/internal/input" "github.com/tis24dev/proxsave/internal/logging" "github.com/tis24dev/proxsave/internal/tui" "github.com/tis24dev/proxsave/internal/tui/components" @@ -243,7 +244,9 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg } clusterServicesStopped = true defer func() { - if err := startPVEClusterServices(ctx, logger); err != nil { + restartCtx, cancel := context.WithTimeout(context.Background(), 2*serviceStartTimeout+2*serviceVerifyTimeout+10*time.Second) + defer cancel() + if err := startPVEClusterServices(restartCtx, logger); err != nil { logger.Warning("Failed to restart PVE services after restore: %v", err) } }() @@ -270,7 +273,9 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg } else { pbsServicesStopped = true defer func() { - if err := startPBSServices(ctx, logger); err != nil { + restartCtx, cancel := context.WithTimeout(context.Background(), 2*serviceStartTimeout+2*serviceVerifyTimeout+10*time.Second) + defer cancel() + if err := startPBSServices(restartCtx, logger); err != nil { logger.Warning("Failed to restart PBS services after restore: %v", err) } }() @@ -353,6 +358,9 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg } if exportLog, exErr := extractSelectiveArchive(ctx, prepared.ArchivePath, exportRoot, plan.ExportCategories, RestoreModeCustom, logger); exErr != nil { + if errors.Is(exErr, ErrRestoreAborted) || input.IsAborted(exErr) { + return exErr + } logger.Warning("Export completed with errors: %v", exErr) } else { exportLogPath = exportLog @@ -365,6 +373,9 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg logger.Warning("Cluster SAFE mode selected but export directory not available; skipping automatic pvesh apply") } else if err := runSafeClusterApply(ctx, bufio.NewReader(os.Stdin), exportRoot, logger); err != nil { // Note: runSafeClusterApply currently uses console prompts; this step remains non-TUI. + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Cluster SAFE apply completed with errors: %v", err) } } @@ -381,6 +392,9 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg } if stageLog, err := extractSelectiveArchive(ctx, prepared.ArchivePath, stageRoot, plan.StagedCategories, RestoreModeCustom, logger); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Staging completed with errors: %v", err) } else { stageLogPath = stageLog @@ -388,12 +402,18 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg logger.Info("") if err := maybeApplyPBSConfigsFromStage(ctx, logger, plan, stageRoot, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("PBS staged config apply: %v", err) } } stageRootForNetworkApply := stageRoot if installed, err := maybeInstallNetworkConfigFromStage(ctx, logger, plan, stageRoot, prepared.ArchivePath, networkRollbackBackup, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Network staged install: %v", err) } else if installed { stageRootForNetworkApply = "" @@ -417,12 +437,18 @@ func RunRestoreWorkflowTUI(ctx context.Context, cfg *config.Config, logger *logg if plan.HasCategoryID("network") { logger.Info("") if err := maybeRepairResolvConfAfterRestore(ctx, logger, prepared.ArchivePath, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("DNS resolver repair: %v", err) } } logger.Info("") if err := maybeApplyNetworkConfigTUI(ctx, logger, plan, safetyBackup, networkRollbackBackup, stageRootForNetworkApply, prepared.ArchivePath, configPath, buildSig, cfg.DryRun); err != nil { + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } logger.Warning("Network apply step skipped or failed: %v", err) } @@ -1120,17 +1146,25 @@ func maybeApplyNetworkConfigTUI(ctx context.Context, logger *logging.Logger, pla } logging.DebugStep(logger, "network safe apply (tui)", "Prompt: apply network now with rollback timer") + sourceLine := "Source: /etc/network (will be applied)" + if strings.TrimSpace(stageRoot) != "" { + sourceLine = fmt.Sprintf("Source: %s (will be copied to /etc and applied)", strings.TrimSpace(stageRoot)) + } message := fmt.Sprintf( - "Apply restored network configuration now with an automatic rollback timer (%ds).\n\nIf you do not commit the changes, the previous network configuration will be restored automatically.\n\nProceed with live network apply?", + "Network restore: a restored network configuration is ready to apply.\n%s\n\nThis will reload networking immediately (no reboot).\n\nWARNING: This may change the active IP and disconnect SSH/Web sessions.\n\nAfter applying, type COMMIT within %ds or ProxSave will roll back automatically.\n\nRecommendation: run this step from the local console/IPMI, not over SSH.\n\nApply network configuration now?", + sourceLine, int(defaultNetworkRollbackTimeout.Seconds()), ) - applyNow, err := promptYesNoTUIFunc( + applyNow, err := promptYesNoTUIWithCountdown( + ctx, + logger, "Apply network configuration", configPath, buildSig, message, "Apply now", "Skip apply", + 90*time.Second, ) if err != nil { return err @@ -1915,7 +1949,7 @@ func runFullRestoreTUI(ctx context.Context, candidate *decryptCandidate, prepare msg.WriteString("\nHint: verify disks/UUIDs and options (nofail/_netdev) before adding them.\n") } - apply, perr := promptYesNoTUIFunc("Smart fstab merge", configPath, buildSig, msg.String(), "Apply", "Skip") + apply, perr := promptYesNoTUIWithCountdown(ctx, logger, "Smart fstab merge", configPath, buildSig, msg.String(), "Apply", "Skip", 90*time.Second) if perr != nil { return perr } @@ -1976,6 +2010,96 @@ func promptYesNoTUI(title, configPath, buildSig, message, yesLabel, noLabel stri return result, nil } +func promptYesNoTUIWithCountdown(ctx context.Context, logger *logging.Logger, title, configPath, buildSig, message, yesLabel, noLabel string, timeout time.Duration) (bool, error) { + app := newTUIApp() + var result bool + var cancelled bool + var timedOut bool + + infoText := tview.NewTextView(). + SetText(message). + SetWrap(true). + SetTextColor(tcell.ColorWhite). + SetDynamicColors(true) + + countdownText := tview.NewTextView(). + SetWrap(false). + SetTextColor(tcell.ColorYellow). + SetDynamicColors(true) + + deadline := time.Now().Add(timeout) + updateCountdown := func() { + left := time.Until(deadline) + if left < 0 { + left = 0 + } + countdownText.SetText(fmt.Sprintf("Auto-skip in %ds (default: %s)", int(left.Seconds()), noLabel)) + } + updateCountdown() + + form := components.NewForm(app) + form.SetOnSubmit(func(values map[string]string) error { + result = true + return nil + }) + form.SetOnCancel(func() { + cancelled = true + }) + form.AddSubmitButton(yesLabel) + form.AddCancelButton(noLabel) + enableFormNavigation(form, nil) + + content := tview.NewFlex(). + SetDirection(tview.FlexRow). + AddItem(infoText, 0, 1, false). + AddItem(countdownText, 1, 0, false). + AddItem(form.Form, 3, 0, true) + + page := buildRestoreWizardPage(title, configPath, buildSig, content) + form.SetParentView(page) + + stopCh := make(chan struct{}) + defer close(stopCh) + + if timeout > 0 { + go func() { + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + for { + select { + case <-stopCh: + return + case <-ctx.Done(): + cancelled = true + app.Stop() + return + case <-ticker.C: + left := time.Until(deadline) + if left <= 0 { + timedOut = true + cancelled = true + app.Stop() + return + } + app.QueueUpdateDraw(func() { updateCountdown() }) + } + } + }() + } + + if err := app.SetRoot(page, true).SetFocus(form.Form).Run(); err != nil { + return false, err + } + if timedOut { + logging.DebugStep(logger, "prompt yes/no (tui)", "Timeout expired (%s): proceeding with No", timeout) + return false, nil + } + if cancelled { + return false, nil + } + return result, nil +} + func promptOkTUI(title, configPath, buildSig, message, okLabel string) error { app := newTUIApp() diff --git a/internal/orchestrator/restore_workflow_abort_test.go b/internal/orchestrator/restore_workflow_abort_test.go new file mode 100644 index 0000000..329daa1 --- /dev/null +++ b/internal/orchestrator/restore_workflow_abort_test.go @@ -0,0 +1,143 @@ +package orchestrator + +import ( + "bufio" + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/tis24dev/proxsave/internal/backup" + "github.com/tis24dev/proxsave/internal/config" + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +func TestRunRestoreWorkflow_FstabPromptInputAborted_AbortsWorkflow(t *testing.T) { + origRestoreFS := restoreFS + origRestoreCmd := restoreCmd + origRestorePrompter := restorePrompter + origRestoreSystem := restoreSystem + origRestoreTime := restoreTime + origCompatFS := compatFS + origPrepare := prepareDecryptedBackupFunc + origSafetyFS := safetyFS + origSafetyNow := safetyNow + t.Cleanup(func() { + restoreFS = origRestoreFS + restoreCmd = origRestoreCmd + restorePrompter = origRestorePrompter + restoreSystem = origRestoreSystem + restoreTime = origRestoreTime + compatFS = origCompatFS + prepareDecryptedBackupFunc = origPrepare + safetyFS = origSafetyFS + safetyNow = origSafetyNow + }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + compatFS = fakeFS + safetyFS = fakeFS + + fakeNow := &FakeTime{Current: time.Date(2020, 1, 2, 3, 4, 5, 0, time.UTC)} + restoreTime = fakeNow + safetyNow = fakeNow.Now + + restoreSystem = fakeSystemDetector{systemType: SystemTypePVE} + restoreCmd = runOnlyRunner{} + + // Make compatibility detection treat this as PVE. + if err := fakeFS.AddFile("/usr/bin/qm", []byte("x")); err != nil { + t.Fatalf("fakeFS.AddFile: %v", err) + } + + currentFstab := "UUID=root / ext4 defaults 0 1\nUUID=swap none swap sw 0 0\n" + if err := fakeFS.WriteFile("/etc/fstab", []byte(currentFstab), 0o640); err != nil { + t.Fatalf("fakeFS.WriteFile(/etc/fstab): %v", err) + } + + backupFstab := "UUID=root / ext4 defaults 0 1\nUUID=swap none swap sw 0 0\n192.168.1.246:/volume1/ProxmoxNFS /mnt/Synology_NFS nfs defaults 0 0\n" + tmpTar := filepath.Join(t.TempDir(), "bundle.tar") + if err := writeTarFile(tmpTar, map[string]string{ + "etc/fstab": backupFstab, + }); err != nil { + t.Fatalf("writeTarFile: %v", err) + } + tarBytes, err := os.ReadFile(tmpTar) + if err != nil { + t.Fatalf("os.ReadFile: %v", err) + } + if err := fakeFS.WriteFile("/bundle.tar", tarBytes, 0o640); err != nil { + t.Fatalf("fakeFS.WriteFile(/bundle.tar): %v", err) + } + + restorePrompter = fakeRestorePrompter{ + mode: RestoreModeCustom, + categories: []Category{mustCategoryByID(t, "filesystem")}, + confirmed: true, + } + + prepareDecryptedBackupFunc = func(ctx context.Context, reader *bufio.Reader, cfg *config.Config, logger *logging.Logger, version string, requireEncrypted bool) (*decryptCandidate, *preparedBundle, error) { + cand := &decryptCandidate{ + DisplayBase: "test", + Manifest: &backup.Manifest{ + CreatedAt: fakeNow.Now(), + ClusterMode: "standalone", + ProxmoxType: "pve", + ScriptVersion: "vtest", + }, + } + prepared := &preparedBundle{ + ArchivePath: "/bundle.tar", + Manifest: backup.Manifest{ArchivePath: "/bundle.tar"}, + cleanup: func() {}, + } + return cand, prepared, nil + } + + // Simulate Ctrl+C behavior: stdin closed -> input.ErrInputAborted during the fstab prompt. + oldIn := os.Stdin + oldOut := os.Stdout + oldErr := os.Stderr + t.Cleanup(func() { + os.Stdin = oldIn + os.Stdout = oldOut + os.Stderr = oldErr + }) + inR, inW, err := os.Pipe() + if err != nil { + t.Fatalf("os.Pipe: %v", err) + } + out, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0o666) + if err != nil { + _ = inR.Close() + _ = inW.Close() + t.Fatalf("OpenFile(%s): %v", os.DevNull, err) + } + errOut, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0o666) + if err != nil { + _ = inR.Close() + _ = inW.Close() + _ = out.Close() + t.Fatalf("OpenFile(%s): %v", os.DevNull, err) + } + os.Stdin = inR + os.Stdout = out + os.Stderr = errOut + t.Cleanup(func() { + _ = inR.Close() + _ = out.Close() + _ = errOut.Close() + }) + _ = inW.Close() + + logger := logging.New(types.LogLevelError, false) + cfg := &config.Config{BaseDir: "/base"} + + if err := RunRestoreWorkflow(context.Background(), cfg, logger, "vtest"); err != ErrRestoreAborted { + t.Fatalf("err=%v; want %v", err, ErrRestoreAborted) + } +} diff --git a/internal/orchestrator/restore_workflow_warnings_test.go b/internal/orchestrator/restore_workflow_warnings_test.go new file mode 100644 index 0000000..b7aeb1e --- /dev/null +++ b/internal/orchestrator/restore_workflow_warnings_test.go @@ -0,0 +1,174 @@ +package orchestrator + +import ( + "bufio" + "context" + "errors" + "io/fs" + "os" + "path/filepath" + "testing" + "time" + + "github.com/tis24dev/proxsave/internal/backup" + "github.com/tis24dev/proxsave/internal/config" + "github.com/tis24dev/proxsave/internal/logging" + "github.com/tis24dev/proxsave/internal/types" +) + +type failWritePathFS struct { + FS + failPath string + failErr error +} + +func (f failWritePathFS) WriteFile(path string, data []byte, perm fs.FileMode) error { + if filepath.Clean(path) == filepath.Clean(f.failPath) { + if f.failErr != nil { + return f.failErr + } + return errors.New("injected write failure") + } + return f.FS.WriteFile(path, data, perm) +} + +func TestRunRestoreWorkflow_FstabMergeFails_ContinuesWithWarnings(t *testing.T) { + origRestoreFS := restoreFS + origRestoreCmd := restoreCmd + origRestorePrompter := restorePrompter + origRestoreSystem := restoreSystem + origRestoreTime := restoreTime + origCompatFS := compatFS + origPrepare := prepareDecryptedBackupFunc + origSafetyFS := safetyFS + origSafetyNow := safetyNow + t.Cleanup(func() { + restoreFS = origRestoreFS + restoreCmd = origRestoreCmd + restorePrompter = origRestorePrompter + restoreSystem = origRestoreSystem + restoreTime = origRestoreTime + compatFS = origCompatFS + prepareDecryptedBackupFunc = origPrepare + safetyFS = origSafetyFS + safetyNow = origSafetyNow + }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + compatFS = fakeFS + safetyFS = fakeFS + + fakeNow := &FakeTime{Current: time.Date(2020, 1, 2, 3, 4, 5, 0, time.UTC)} + restoreTime = fakeNow + safetyNow = fakeNow.Now + + restoreSystem = fakeSystemDetector{systemType: SystemTypePVE} + restoreCmd = runOnlyRunner{} + + // Make compatibility detection treat this as PVE. + if err := fakeFS.AddFile("/usr/bin/qm", []byte("x")); err != nil { + t.Fatalf("fakeFS.AddFile: %v", err) + } + + currentFstab := "UUID=root / ext4 defaults 0 1\nUUID=swap none swap sw 0 0\n" + if err := fakeFS.WriteFile("/etc/fstab", []byte(currentFstab), 0o640); err != nil { + t.Fatalf("fakeFS.WriteFile(/etc/fstab): %v", err) + } + + backupFstab := "UUID=root / ext4 defaults 0 1\nUUID=swap none swap sw 0 0\n192.168.1.246:/volume1/ProxmoxNFS /mnt/Synology_NFS nfs defaults 0 0\n" + tmpTar := filepath.Join(t.TempDir(), "bundle.tar") + if err := writeTarFile(tmpTar, map[string]string{ + "etc/fstab": backupFstab, + }); err != nil { + t.Fatalf("writeTarFile: %v", err) + } + tarBytes, err := os.ReadFile(tmpTar) + if err != nil { + t.Fatalf("os.ReadFile: %v", err) + } + if err := fakeFS.WriteFile("/bundle.tar", tarBytes, 0o640); err != nil { + t.Fatalf("fakeFS.WriteFile(/bundle.tar): %v", err) + } + + restorePrompter = fakeRestorePrompter{ + mode: RestoreModeCustom, + categories: []Category{mustCategoryByID(t, "filesystem")}, + confirmed: true, + } + + prepareDecryptedBackupFunc = func(ctx context.Context, reader *bufio.Reader, cfg *config.Config, logger *logging.Logger, version string, requireEncrypted bool) (*decryptCandidate, *preparedBundle, error) { + cand := &decryptCandidate{ + DisplayBase: "test", + Manifest: &backup.Manifest{ + CreatedAt: fakeNow.Now(), + ClusterMode: "standalone", + ProxmoxType: "pve", + ScriptVersion: "vtest", + }, + } + prepared := &preparedBundle{ + ArchivePath: "/bundle.tar", + Manifest: backup.Manifest{ArchivePath: "/bundle.tar"}, + cleanup: func() {}, + } + return cand, prepared, nil + } + + // Inject a non-abort error during applyFstabMerge backup write. + restoreFS = failWritePathFS{ + FS: fakeFS, + failPath: "/etc/fstab.bak-20200102-030405", + failErr: errors.New("disk full"), + } + + // Provide input to accept defaultYes (blank line). + oldIn := os.Stdin + oldOut := os.Stdout + oldErr := os.Stderr + t.Cleanup(func() { + os.Stdin = oldIn + os.Stdout = oldOut + os.Stderr = oldErr + }) + inR, inW, err := os.Pipe() + if err != nil { + t.Fatalf("os.Pipe: %v", err) + } + out, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0o666) + if err != nil { + _ = inR.Close() + _ = inW.Close() + t.Fatalf("OpenFile(%s): %v", os.DevNull, err) + } + errOut, err := os.OpenFile(os.DevNull, os.O_WRONLY, 0o666) + if err != nil { + _ = inR.Close() + _ = inW.Close() + _ = out.Close() + t.Fatalf("OpenFile(%s): %v", os.DevNull, err) + } + os.Stdin = inR + os.Stdout = out + os.Stderr = errOut + t.Cleanup(func() { + _ = inR.Close() + _ = out.Close() + _ = errOut.Close() + }) + if _, err := inW.WriteString("\n"); err != nil { + t.Fatalf("WriteString: %v", err) + } + _ = inW.Close() + + logger := logging.New(types.LogLevelWarning, false) + cfg := &config.Config{BaseDir: "/base"} + + if err := RunRestoreWorkflow(context.Background(), cfg, logger, "vtest"); err != nil { + t.Fatalf("RunRestoreWorkflow error: %v", err) + } + if !logger.HasWarnings() { + t.Fatalf("expected warnings") + } +} From c0ee0248be6683d25b791b39ba1b349a1f6c7746 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 14:34:51 +0100 Subject: [PATCH 05/10] Add network rollback countdown and abort info display Implements a countdown display for network rollback when a restore is aborted, providing detailed status and IP information to the user. Adds extraction of original IP from network snapshots, extends error and abort info structures to include rollback marker and deadline, and ensures abort info is available for display after restore aborts. Also improves logging and messaging for network rollback events. --- cmd/proxsave/main.go | 109 ++++++++++++++++++++++++- internal/orchestrator/.backup.lock | 4 +- internal/orchestrator/network_apply.go | 98 ++++++++++++++++++++-- internal/orchestrator/restore.go | 31 +++++++ 4 files changed, 230 insertions(+), 12 deletions(-) diff --git a/cmd/proxsave/main.go b/cmd/proxsave/main.go index 4e9fc18..45881fa 100644 --- a/cmd/proxsave/main.go +++ b/cmd/proxsave/main.go @@ -603,6 +603,15 @@ func run() int { } }() + // Defer for network rollback countdown (LIFO: executes BEFORE footer) + defer func() { + if finalExitCode == exitCodeInterrupted { + if abortInfo := orchestrator.GetLastRestoreAbortInfo(); abortInfo != nil { + printNetworkRollbackCountdown(abortInfo) + } + } + }() + defer func() { if !args.Support || pendingSupportStats == nil { return @@ -746,7 +755,7 @@ func run() int { logging.Info("Restore mode enabled - starting CLI workflow...") if err := orchestrator.RunRestoreWorkflow(ctx, cfg, logger, toolVersion); err != nil { if errors.Is(err, orchestrator.ErrRestoreAborted) { - logging.Info("Restore workflow aborted by user") + logging.Warning("Restore workflow aborted by user") if args.Support { pendingSupportStats = support.BuildSupportStats(logger, resolveHostname(), envInfo.Type, envInfo.Version, toolVersion, startTime, time.Now(), exitCodeInterrupted, "restore") } @@ -776,7 +785,7 @@ func run() int { } if err := orchestrator.RunRestoreWorkflowTUI(ctx, cfg, logger, toolVersion, args.ConfigPath, sig); err != nil { if errors.Is(err, orchestrator.ErrRestoreAborted) || errors.Is(err, orchestrator.ErrDecryptAborted) { - logging.Info("Restore workflow aborted by user") + logging.Warning("Restore workflow aborted by user") if args.Support { pendingSupportStats = support.BuildSupportStats(logger, resolveHostname(), envInfo.Type, envInfo.Version, toolVersion, startTime, time.Now(), exitCodeInterrupted, "restore") } @@ -1382,6 +1391,102 @@ func run() int { return finalExitCode } +const rollbackCountdownDisplayDuration = 10 * time.Second + +func printNetworkRollbackCountdown(abortInfo *orchestrator.RestoreAbortInfo) { + if abortInfo == nil { + return + } + + color := "\033[33m" // yellow + colorReset := "\033[0m" + + markerExists := false + if strings.TrimSpace(abortInfo.NetworkRollbackMarker) != "" { + if _, err := os.Stat(strings.TrimSpace(abortInfo.NetworkRollbackMarker)); err == nil { + markerExists = true + } + } + + status := "UNKNOWN" + switch { + case markerExists: + status = "ARMED (will execute automatically)" + case !abortInfo.RollbackDeadline.IsZero() && time.Now().After(abortInfo.RollbackDeadline): + status = "EXECUTED (marker removed)" + case strings.TrimSpace(abortInfo.NetworkRollbackMarker) != "": + status = "DISARMED/CLEARED (marker removed before deadline)" + case abortInfo.NetworkRollbackArmed: + status = "ARMED (status from snapshot)" + default: + status = "NOT ARMED" + } + + fmt.Println() + fmt.Printf("%s===========================================\n", color) + fmt.Printf("NETWORK ROLLBACK%s\n", colorReset) + fmt.Println() + + // Static info + fmt.Printf(" Status: %s\n", status) + if strings.TrimSpace(abortInfo.OriginalIP) != "" && abortInfo.OriginalIP != "unknown" { + fmt.Printf(" Pre-apply IP (from snapshot): %s\n", strings.TrimSpace(abortInfo.OriginalIP)) + } + if strings.TrimSpace(abortInfo.CurrentIP) != "" && abortInfo.CurrentIP != "unknown" { + fmt.Printf(" Post-apply IP (observed): %s\n", strings.TrimSpace(abortInfo.CurrentIP)) + } + if strings.TrimSpace(abortInfo.NetworkRollbackLog) != "" { + fmt.Printf(" Rollback log: %s\n", strings.TrimSpace(abortInfo.NetworkRollbackLog)) + } + fmt.Println() + + switch { + case markerExists && !abortInfo.RollbackDeadline.IsZero() && time.Until(abortInfo.RollbackDeadline) > 0: + fmt.Println("Connection will be temporarily interrupted during restore.") + if strings.TrimSpace(abortInfo.OriginalIP) != "" && abortInfo.OriginalIP != "unknown" { + fmt.Printf("Remember to reconnect using the pre-apply IP: %s\n", strings.TrimSpace(abortInfo.OriginalIP)) + } + case !markerExists && !abortInfo.RollbackDeadline.IsZero() && time.Now().After(abortInfo.RollbackDeadline): + if strings.TrimSpace(abortInfo.OriginalIP) != "" && abortInfo.OriginalIP != "unknown" { + fmt.Printf("Rollback executed: reconnect using the pre-apply IP: %s\n", strings.TrimSpace(abortInfo.OriginalIP)) + } + case !markerExists && strings.TrimSpace(abortInfo.NetworkRollbackMarker) != "": + if strings.TrimSpace(abortInfo.CurrentIP) != "" && abortInfo.CurrentIP != "unknown" { + fmt.Printf("Rollback will NOT run: reconnect using the post-apply IP: %s\n", strings.TrimSpace(abortInfo.CurrentIP)) + } + } + + // Live countdown for max 10 seconds (only when rollback is still armed). + if !markerExists || abortInfo.RollbackDeadline.IsZero() { + fmt.Printf("%s===========================================%s\n", color, colorReset) + return + } + + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + displayEnd := time.Now().Add(rollbackCountdownDisplayDuration) + + for { + remaining := time.Until(abortInfo.RollbackDeadline) + if remaining <= 0 { + fmt.Printf("\r Remaining: Rollback executing now... \n") + break + } + if time.Now().After(displayEnd) { + fmt.Printf("\r Remaining: %ds (exiting, rollback will proceed)\n", int(remaining.Seconds())) + break + } + fmt.Printf("\r Remaining: %ds ", int(remaining.Seconds())) + + select { + case <-ticker.C: + continue + } + } + + fmt.Printf("%s===========================================%s\n", color, colorReset) +} + func printFinalSummary(finalExitCode int) { fmt.Println() diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 09a6350..c2e1113 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=2067929 +pid=2228394 host=pve -time=2026-01-21T10:24:39+01:00 +time=2026-01-21T16:03:32+01:00 diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index f0737db..22de576 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "net" "os" "os/exec" "path/filepath" @@ -20,9 +21,12 @@ const defaultNetworkRollbackTimeout = 180 * time.Second var ErrNetworkApplyNotCommitted = errors.New("network configuration not committed") type NetworkApplyNotCommittedError struct { - RollbackLog string - RestoredIP string - RollbackArmed bool + RollbackLog string + RollbackMarker string + RestoredIP string + OriginalIP string // IP from backup file (will be restored by rollback) + RollbackArmed bool + RollbackDeadline time.Time // when rollback will execute } func (e *NetworkApplyNotCommittedError) Error() string { @@ -444,6 +448,64 @@ func applyNetworkWithRollbackCLI(ctx context.Context, reader *bufio.Reader, logg return buildNetworkApplyNotCommittedError(ctx, logger, iface, handle) } +// extractIPFromSnapshot reads the IP address for a given interface from a network snapshot report file. +// It searches the output section that follows the "$ ip -br addr" command written by writeNetworkSnapshot. +func extractIPFromSnapshot(path, iface string) string { + if strings.TrimSpace(path) == "" || strings.TrimSpace(iface) == "" { + return "unknown" + } + data, err := restoreFS.ReadFile(path) + if err != nil { + return "unknown" + } + + lines := strings.Split(string(data), "\n") + inAddrSection := false + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "$ ip -br addr" { + inAddrSection = true + continue + } + if strings.HasPrefix(line, "$ ") { + if inAddrSection { + break + } + continue + } + if !inAddrSection || line == "" || strings.HasPrefix(line, "ERROR:") { + continue + } + + fields := strings.Fields(line) + if len(fields) < 3 || fields[0] != iface { + continue + } + + // "ip -br addr" can print multiple addresses; prefer IPv4 when available. + firstIPv6 := "" + for _, token := range fields[2:] { + ip := strings.Split(token, "/")[0] + parsed := net.ParseIP(ip) + if parsed == nil { + continue + } + if parsed.To4() != nil { + return ip + } + if firstIPv6 == "" { + firstIPv6 = ip + } + } + if firstIPv6 != "" { + return firstIPv6 + } + return "unknown" + } + + return "unknown" +} + func buildNetworkApplyNotCommittedError(ctx context.Context, logger *logging.Logger, iface string, handle *networkRollbackHandle) *NetworkApplyNotCommittedError { logging.DebugStep(logger, "build not-committed error", "Start: iface=%s handle=%v", iface, handle != nil) @@ -474,15 +536,31 @@ func buildNetworkApplyNotCommittedError(ctx context.Context, logger *logging.Log } rollbackLog := "" + rollbackMarker := "" + originalIP := "unknown" + var rollbackDeadline time.Time if handle != nil { rollbackLog = strings.TrimSpace(handle.logPath) + rollbackMarker = strings.TrimSpace(handle.markerPath) + // Read original IP from before.txt snapshot (IP that will be restored by rollback) + if strings.TrimSpace(handle.workDir) != "" { + beforePath := filepath.Join(handle.workDir, "before.txt") + originalIP = extractIPFromSnapshot(beforePath, iface) + logging.DebugStep(logger, "build not-committed error", "Original IP from %s: %s", beforePath, originalIP) + } + // Calculate rollback deadline + rollbackDeadline = handle.armedAt.Add(handle.timeout) + logging.DebugStep(logger, "build not-committed error", "Rollback deadline: %s", rollbackDeadline.Format(time.RFC3339)) } - logging.DebugStep(logger, "build not-committed error", "Result: ip=%s armed=%v log=%s", restoredIP, rollbackArmed, rollbackLog) + logging.DebugStep(logger, "build not-committed error", "Result: ip=%s originalIP=%s armed=%v log=%s", restoredIP, originalIP, rollbackArmed, rollbackLog) return &NetworkApplyNotCommittedError{ - RollbackLog: rollbackLog, - RestoredIP: strings.TrimSpace(restoredIP), - RollbackArmed: rollbackArmed, + RollbackLog: rollbackLog, + RollbackMarker: rollbackMarker, + RestoredIP: strings.TrimSpace(restoredIP), + OriginalIP: originalIP, + RollbackArmed: rollbackArmed, + RollbackDeadline: rollbackDeadline, } } @@ -1099,8 +1177,12 @@ func buildRollbackScript(markerPath, backupPath, logPath string, restartNetworki } lines = append(lines, + `echo "[DEBUG] Removing marker file..." >> "$LOG"`, `rm -f "$MARKER"`, - `echo "Rollback finished at $(date -Is)" >> "$LOG"`, + `echo "[INFO] ========================================" >> "$LOG"`, + `echo "[INFO] NETWORK ROLLBACK SCRIPT FINISHED" >> "$LOG"`, + `echo "[INFO] Timestamp: $(date -Is)" >> "$LOG"`, + `echo "[INFO] ========================================" >> "$LOG"`, ) return strings.Join(lines, "\n") + "\n" } diff --git a/internal/orchestrator/restore.go b/internal/orchestrator/restore.go index 0e2a2a8..d50d267 100644 --- a/internal/orchestrator/restore.go +++ b/internal/orchestrator/restore.go @@ -38,6 +38,28 @@ var ( prepareDecryptedBackupFunc = prepareDecryptedBackup ) +// RestoreAbortInfo contains information about an aborted restore with network rollback. +type RestoreAbortInfo struct { + NetworkRollbackArmed bool + NetworkRollbackLog string + NetworkRollbackMarker string + OriginalIP string // IP from backup file (will be restored by rollback) + CurrentIP string // IP after apply (before rollback) + RollbackDeadline time.Time // when rollback will execute +} + +var lastRestoreAbortInfo *RestoreAbortInfo + +// GetLastRestoreAbortInfo returns info about the last restore abort, if any. +func GetLastRestoreAbortInfo() *RestoreAbortInfo { + return lastRestoreAbortInfo +} + +// ClearRestoreAbortInfo clears the stored abort info. +func ClearRestoreAbortInfo() { + lastRestoreAbortInfo = nil +} + func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging.Logger, version string) (err error) { if cfg == nil { return fmt.Errorf("configuration not available") @@ -498,6 +520,15 @@ func RunRestoreWorkflow(ctx context.Context, cfg *config.Config, logger *logging } rollbackLog = strings.TrimSpace(notCommitted.RollbackLog) rollbackArmed = notCommitted.RollbackArmed + // Save abort info for footer display + lastRestoreAbortInfo = &RestoreAbortInfo{ + NetworkRollbackArmed: rollbackArmed, + NetworkRollbackLog: rollbackLog, + NetworkRollbackMarker: strings.TrimSpace(notCommitted.RollbackMarker), + OriginalIP: notCommitted.OriginalIP, + CurrentIP: observedIP, + RollbackDeadline: notCommitted.RollbackDeadline, + } } if rollbackArmed { logger.Warning("Network apply not committed; rollback is ARMED and will run automatically. Current IP: %s", observedIP) From 8354e00d2abf7653c93ec8e1f9139e86e022d0b7 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 14:38:00 +0100 Subject: [PATCH 06/10] Normalize cloud log path handling in CloudStorage Introduces the cloudLogBase helper to consistently normalize cloud log paths, ensuring remote prefixes are applied when needed. Updates countLogFiles and deleteAssociatedLog to use the normalized path, and adds tests to verify correct behavior when CloudLogPath is specified without a remote. --- internal/storage/cloud.go | 20 +++++++++-- internal/storage/cloud_test.go | 61 ++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/internal/storage/cloud.go b/internal/storage/cloud.go index 1b5036e..e7cd189 100644 --- a/internal/storage/cloud.go +++ b/internal/storage/cloud.go @@ -1271,6 +1271,10 @@ func (c *CloudStorage) deleteAssociatedLog(ctx context.Context, backupFile strin if base == "" { return false } + baseForWarning := c.cloudLogBase(base) + if baseForWarning == "" { + baseForWarning = base + } host, ts, ok := extractLogKeyFromBackup(backupFile) if !ok { @@ -1296,7 +1300,7 @@ func (c *CloudStorage) deleteAssociatedLog(ctx context.Context, backupFile strin msg := strings.TrimSpace(string(output)) if isRcloneObjectNotFound(msg) || isRcloneObjectNotFound(err.Error()) { if strings.Contains(strings.ToLower(msg), "directory") { - c.markCloudLogPathMissing(base, msg) + c.markCloudLogPathMissing(baseForWarning, msg) } else { c.logger.Debug("Cloud logs: log already removed %s (%s)", cloudPath, msg) } @@ -1317,7 +1321,7 @@ func (c *CloudStorage) countLogFiles(ctx context.Context) int { if c == nil || c.config == nil { return -1 } - base := strings.TrimSpace(c.config.CloudLogPath) + base := c.cloudLogBase(c.config.CloudLogPath) if base == "" { return 0 } @@ -1357,6 +1361,18 @@ func (c *CloudStorage) countLogFiles(ctx context.Context) int { return count } +func (c *CloudStorage) cloudLogBase(basePath string) string { + base := strings.TrimSpace(basePath) + if base == "" { + return "" + } + if !strings.Contains(base, ":") && c != nil && strings.TrimSpace(c.remote) != "" { + base = strings.TrimSpace(c.remote) + ":" + base + } + base = strings.TrimRight(base, "/") + return base +} + // cloudLogPath builds the full cloud log path, mirroring buildCloudLogDestination logic. // Supports both new style (/path) and legacy style (remote:/path). // If basePath doesn't contain ":", uses c.remote as the remote name. diff --git a/internal/storage/cloud_test.go b/internal/storage/cloud_test.go index 2a394f1..8e091c4 100644 --- a/internal/storage/cloud_test.go +++ b/internal/storage/cloud_test.go @@ -575,6 +575,67 @@ func TestCloudStorageSkipsCloudLogsWhenPathMissing(t *testing.T) { } } +func TestCloudStorageCountLogFiles_NormalizesPathOnlyCloudLogPath(t *testing.T) { + cfg := &config.Config{ + CloudEnabled: true, + CloudRemote: "remote:base-prefix", + CloudLogPath: "/logs/", + RcloneRetries: 1, + } + cs := newCloudStorageForTest(cfg) + queue := &commandQueue{ + t: t, + queue: []queuedResponse{ + { + name: "rclone", + args: []string{"lsf", "remote:/logs", "--files-only"}, + out: "backup-host-20250101-010101.log\n", + }, + }, + } + cs.execCommand = queue.exec + + if got := cs.countLogFiles(context.Background()); got != 1 { + t.Fatalf("countLogFiles() = %d; want 1", got) + } +} + +func TestCloudStorageSkipsCloudLogsWhenPathOnlyLogDirMissing(t *testing.T) { + cfg := &config.Config{ + CloudEnabled: true, + CloudRemote: "remote", + CloudLogPath: "/logs", + RcloneRetries: 1, + } + cs := newCloudStorageForTest(cfg) + queue := &commandQueue{ + t: t, + queue: []queuedResponse{ + { + name: "rclone", + args: []string{"lsf", "remote:/logs", "--files-only"}, + out: "2025/11/16 22:11:47 ERROR : remote:/logs: directory not found", + err: errors.New("exit status 3"), + }, + }, + } + cs.execCommand = queue.exec + + if got := cs.countLogFiles(context.Background()); got != -1 { + t.Fatalf("countLogFiles() = %d; want -1", got) + } + if len(queue.calls) != 1 { + t.Fatalf("expected 1 rclone call, got %d", len(queue.calls)) + } + + if got := cs.countLogFiles(context.Background()); got != -1 { + t.Fatalf("countLogFiles() second call = %d; want -1", got) + } + if len(queue.calls) != 1 { + t.Fatalf("expected no additional rclone calls, got %d", len(queue.calls)) + } +} + func TestCloudStorageRemoteHelpersAndBuildArgs(t *testing.T) { cfg := &config.Config{ CloudEnabled: true, From ed8fd1bfa618a1f47b6744af492fc2508503554d Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 14:43:02 +0100 Subject: [PATCH 07/10] Clarify cloud path formats and network rollback status Improved documentation for cloud storage configuration, recommending consistent path-only formats for remote and log paths. Added detailed explanations and examples for network rollback status in restore guide and troubleshooting, including footer output and reconnection instructions after SSH/IP changes. --- docs/CLOUD_STORAGE.md | 29 +++++++++++++++-- docs/CONFIGURATION.md | 20 +++++++++++- docs/RESTORE_GUIDE.md | 70 +++++++++++++++++++++++++++++++++++++++++ docs/TROUBLESHOOTING.md | 17 ++++++++++ 4 files changed, 132 insertions(+), 4 deletions(-) diff --git a/docs/CLOUD_STORAGE.md b/docs/CLOUD_STORAGE.md index 8b1800a..d68590d 100644 --- a/docs/CLOUD_STORAGE.md +++ b/docs/CLOUD_STORAGE.md @@ -339,7 +339,7 @@ This is sufficient to start! Other options use sensible defaults. # Cloud storage CLOUD_ENABLED=true CLOUD_REMOTE=GoogleDrive -CLOUD_REMOTE_PATH=/proxsave/backup # Complete folder path inside the remote +CLOUD_REMOTE_PATH=/proxsave/backup # Folder path inside the remote CLOUD_LOG_PATH=/proxsave/log # Optional: log folder inside the same remote # Upload mode @@ -377,8 +377,8 @@ RETENTION_YEARLY=3 |----------|---------|-------------| | `CLOUD_ENABLED` | `false` | Enable cloud storage | | `CLOUD_REMOTE` | _(required)_ | rclone remote **name** from `rclone config` (legacy `remote:path` still supported) | -| `CLOUD_REMOTE_PATH` | _(empty)_ | Full folder path/prefix inside the remote (e.g., `/proxsave/backup`) | -| `CLOUD_LOG_PATH` | _(empty)_ | Optional log folder on the same remote (set `remote:/path` only when using a different remote) | +| `CLOUD_REMOTE_PATH` | _(empty)_ | Folder path/prefix inside the remote (e.g., `/proxsave/backup`) | +| `CLOUD_LOG_PATH` | _(empty)_ | Optional log folder (recommended: path-only on the same remote; use `otherremote:/path` only when using a different remote) | | `CLOUD_UPLOAD_MODE` | `parallel` | `parallel` or `sequential` | | `CLOUD_PARALLEL_MAX_JOBS` | `2` | Max concurrent uploads (parallel mode) | | `CLOUD_PARALLEL_VERIFICATION` | `true` | Verify checksums after upload | @@ -395,6 +395,29 @@ RETENTION_YEARLY=3 For complete configuration reference, see: **[Configuration Guide](CONFIGURATION.md)** +### Recommended Remote Path Formats (Important) + +ProxSave supports both “new style” (path-only) and “legacy style” (`remote:path`) values, but using a consistent format avoids confusion. + +**Recommended:** +- `CLOUD_REMOTE` should be just the **remote name** (no `:`), e.g. `nextcloud` or `GoogleDrive`. +- `CLOUD_REMOTE_PATH` should be a **path inside the remote** (no remote prefix). Use **no trailing slash**. A leading `/` is accepted. +- `CLOUD_LOG_PATH` should be a **folder path** for logs. When logs are stored on the **same remote**, prefer **path-only** here too (no remote prefix). Use `otherremote:/path` only if logs must go to a different remote than `CLOUD_REMOTE`. + +**Examples (same remote):** +```bash +CLOUD_REMOTE=nextcloud-katerasrael +CLOUD_REMOTE_PATH=B+K/BACKUP/marcellus +CLOUD_LOG_PATH=B+K/BACKUP/marcellus/logs +``` + +**Examples (different remotes for backups vs logs):** +```bash +CLOUD_REMOTE=nextcloud-backups +CLOUD_REMOTE_PATH=proxsave/backup/host1 +CLOUD_LOG_PATH=nextcloud-logs:proxsave/log/host1 +``` + ### Understanding CLOUD_REMOTE vs CLOUD_REMOTE_PATH **How CLOUD_REMOTE and CLOUD_REMOTE_PATH work together** diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index eb0d295..ab6c45a 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -425,7 +425,11 @@ CLOUD_ENABLED=false # true | false CLOUD_REMOTE=GoogleDrive # remote name from `rclone config` CLOUD_REMOTE_PATH=/proxsave/backup # folder path inside the remote -# Cloud log path (same remote, optional) +# Cloud log path (optional) +# Recommended (same remote): path-only (no remote prefix) and no trailing slash +# CLOUD_LOG_PATH=proxsave/log +# Legacy / different remote: explicit remote:path +# CLOUD_LOG_PATH=OtherRemote:proxsave/log CLOUD_LOG_PATH=/proxsave/log # leave empty to disable log uploads # Legacy compatibility (still supported): @@ -446,6 +450,20 @@ CLOUD_PARALLEL_VERIFICATION=true # true | false CLOUD_WRITE_HEALTHCHECK=false # true | false (auto-fallback mode vs force write test) ``` +### Recommended Remote Path Formats (Cloud) + +To avoid ambiguity, prefer consistent formats: +- `CLOUD_REMOTE`: remote **name** only (no `:`), e.g. `nextcloud` or `GoogleDrive`. +- `CLOUD_REMOTE_PATH`: path **inside** the remote (no remote prefix), **no trailing slash** (leading `/` is accepted). +- `CLOUD_LOG_PATH`: log **folder path**. When logs are on the same remote as backups, prefer **path-only** here too; use `otherremote:/path` only when logs must go to a different remote. + +Example (same remote for backups + logs): +```bash +CLOUD_REMOTE=nextcloud-katerasrael +CLOUD_REMOTE_PATH=B+K/BACKUP/marcellus +CLOUD_LOG_PATH=B+K/BACKUP/marcellus/logs +``` + ### Connectivity Check Modes | Setting | Mode | Behavior | diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 51bef58..3713273 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -1715,6 +1715,76 @@ This protects SSH/GUI access during network changes. **Result reporting**: - If you do not type `COMMIT`, ProxSave completes the restore with warnings and reports that the original network settings were restored (including the current IP, when detectable), plus the rollback log path. +#### Ctrl+C footer: `NETWORK ROLLBACK` status + +If you interrupt ProxSave with **Ctrl+C** and a live network apply/rollback timer was involved, the CLI footer can print a `NETWORK ROLLBACK` block with a recommended reconnection IP and the rollback log path. + +The status can be one of: +- **ARMED**: rollback is still pending and will execute automatically at the deadline (a short countdown may be shown). +- **DISARMED/CLEARED**: rollback will **not** run (the marker was removed before the deadline; this can happen if it was manually cleared/disarmed). +- **EXECUTED**: rollback already ran (marker removed after the deadline). + +**Which IP should I use?** +- **ARMED**: prepare to reconnect using the **pre-apply IP** once rollback runs. +- **EXECUTED**: reconnect using the **pre-apply IP** (the system should be back on the previous network config). +- **DISARMED/CLEARED**: reconnect using the **post-apply IP** (the applied config remains active). + +Notes: +- *Pre-apply IP* is derived from the `before.txt` snapshot in `/tmp/proxsave/network_apply_*` and may be `unknown` if it cannot be parsed. +- *Post-apply IP* is what ProxSave could observe on the management interface after applying the new config; it may include CIDR suffixes (for example `10.0.0.4/24`) or multiple addresses. + +**Example outputs** + +The `NETWORK ROLLBACK` block is printed just before the standard ProxSave footer (the footer color reflects the exit status, e.g. magenta on Ctrl+C). + +Example 1 — **ARMED** (rollback pending, countdown shown for a few seconds): +```text +=========================================== +NETWORK ROLLBACK + + Status: ARMED (will execute automatically) + Pre-apply IP (from snapshot): 192.168.1.100 + Post-apply IP (observed): 10.0.0.4/24 + Rollback log: /tmp/proxsave/network_rollback_20260122_153012.log + +Connection will be temporarily interrupted during restore. +Remember to reconnect using the pre-apply IP: 192.168.1.100 + Remaining: 147s +=========================================== + +=========================================== +ProxSave - Go - +=========================================== +``` + +Example 2 — **EXECUTED** (rollback already ran, no countdown): +```text +=========================================== +NETWORK ROLLBACK + + Status: EXECUTED (marker removed) + Pre-apply IP (from snapshot): 192.168.1.100 + Post-apply IP (observed): 10.0.0.4/24 + Rollback log: /tmp/proxsave/network_rollback_20260122_153012.log + +Rollback executed: reconnect using the pre-apply IP: 192.168.1.100 +=========================================== +``` + +Example 3 — **DISARMED/CLEARED** (rollback will not run, applied config remains active): +```text +=========================================== +NETWORK ROLLBACK + + Status: DISARMED/CLEARED (marker removed before deadline) + Pre-apply IP (from snapshot): 192.168.1.100 + Post-apply IP (observed): 10.0.0.4/24 + Rollback log: /tmp/proxsave/network_rollback_20260122_153012.log + +Rollback will NOT run: reconnect using the post-apply IP: 10.0.0.4/24 +=========================================== +``` + ### 5. Smart `/etc/fstab` Merge (Optional) If the restore includes filesystem configuration (notably `/etc/fstab`), ProxSave can run a **smart merge** instead of blindly overwriting your current `fstab`. diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 740d962..4fba9c6 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -553,6 +553,23 @@ MIN_DISK_SPACE_PRIMARY_GB=5 # Lower threshold --- ### 7. Restore Issues +#### Restore drops SSH / IP changes during network restore + +**Symptoms**: +- SSH/Web UI disconnects during restore when the `network` category is applied live +- You see a `NETWORK ROLLBACK` block in the footer (especially after Ctrl+C) + +**Explanation**: +- Live network apply can change IP/routes immediately. +- ProxSave protects access by arming a rollback timer that can revert network-related files automatically if `COMMIT` is not received in time. + +**What to do**: +- Prefer running restore from the **local console/IPMI**, not over SSH. +- If the footer says **ARMED**, reconnect using the **pre-apply IP** once rollback runs. +- If it says **EXECUTED**, reconnect using the **pre-apply IP** (rollback already ran). +- If it says **DISARMED/CLEARED**, reconnect using the **post-apply IP** (new config remains active). +- Check the rollback log path printed in the footer for details. + #### Error during network preflight: `addr_add_dry_run() got an unexpected keyword argument 'nodad'` **Symptoms**: From a74c679b89fa649bf2a45b015c66a91e5605302b Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 15:22:54 +0100 Subject: [PATCH 08/10] Refine cluster detection and BACKUP_CLUSTER_CONFIG behavior Improves cluster detection logic to require both multiple node directories and a valid corosync config, preventing false positives on standalone nodes. BACKUP_CLUSTER_CONFIG now controls both cluster config and runtime command collection (pvecm status/nodes, HA status). Documentation and tests updated to clarify and verify the new behavior. --- docs/BACKUP_ENV_MAPPING.md | 6 +- docs/CONFIGURATION.md | 2 +- docs/TROUBLESHOOTING.md | 2 + internal/backup/collector.go | 2 +- internal/backup/collector_pve.go | 82 +++++++++++++++++----- internal/backup/collector_pve_test.go | 61 ++++++++++++++++ internal/backup/collector_pve_util_test.go | 31 +++++++- internal/config/templates/backup.env | 7 +- internal/orchestrator/.backup.lock | 4 +- 9 files changed, 171 insertions(+), 26 deletions(-) diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index a8c3e17..798bdcb 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -18,7 +18,6 @@ This file documents the mapping between variables from the **old Bash `backup.en AUTO_UPDATE_HASHES = SAME BACKUP_BLACKLIST = SAME BACKUP_CEPH_CONFIG = SAME -BACKUP_CLUSTER_CONFIG = SAME BACKUP_CRITICAL_FILES = SAME BACKUP_INSTALLED_PACKAGES = SAME BACKUP_PVE_BACKUP_FILES = SAME @@ -120,6 +119,11 @@ CLOUD_BACKUP_PATH = SEMANTIC CHANGE → CLOUD_REMOTE_PATH ⚠️ **Go**: `CLOUD_REMOTE="GoogleDrive"` (remote name) + `CLOUD_REMOTE_PATH="/proxsave/backup"` (path within that remote) **Migration**: remove the remote name from the bash variable and move the path into `CLOUD_REMOTE_PATH` +BACKUP_CLUSTER_CONFIG = SEMANTIC CHANGE ⚠️ + **Bash/legacy expectation**: skip only cluster config files. + **Go (current)**: also disables cluster runtime collection (`pvecm status`, `pvecm nodes`, HA status). + **Impact**: if you previously disabled cluster file backup but still wanted runtime status collection, re-enable `BACKUP_CLUSTER_CONFIG` (or introduce a separate runtime flag if needed). + STORAGE_WARNING_THRESHOLD_SECONDARY = SEMANTIC CHANGE → MIN_DISK_SPACE_SECONDARY_GB ⚠️ **Bash**: `STORAGE_WARNING_THRESHOLD_SECONDARY="90"` (90% used = warning) **Go**: `MIN_DISK_SPACE_SECONDARY_GB="10"` (minimum 10GB free required) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index ab6c45a..058574c 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -886,7 +886,7 @@ METRICS_PATH=${BASE_DIR}/metrics # Empty = /var/lib/prometheus/node-exporter ```bash # Cluster configuration -BACKUP_CLUSTER_CONFIG=true # /etc/pve/cluster files +BACKUP_CLUSTER_CONFIG=true # Cluster config + runtime (corosync, pvecm status/nodes, HA status) # PVE firewall rules BACKUP_PVE_FIREWALL=true # PVE firewall configuration diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 4fba9c6..09ade05 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -299,6 +299,8 @@ BACKUP_CLUSTER_CONFIG=false BACKUP_ROOT_HOME=false ``` +> Note: `BACKUP_CLUSTER_CONFIG=false` also skips cluster runtime collection (`pvecm status`, `pvecm nodes`, HA status), which helps avoid non-critical cluster warnings on standalone nodes. + --- #### Error: `429 Too Many Requests` (API rate limiting) diff --git a/internal/backup/collector.go b/internal/backup/collector.go index a209020..1f62ed5 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -828,7 +828,7 @@ func (c *Collector) safeCmdOutput(ctx context.Context, cmd, output, description c.incFilesFailed() return fmt.Errorf("critical command `%s` failed for %s: %w (output: %s)", cmdString, description, err, summarizeCommandOutputText(string(out))) } - c.logger.Warning("Skipping %s: command `%s` failed (%v). Non-critical; backup continues. Ensure the PBS CLI is available and has proper permissions. Output: %s", + c.logger.Warning("Skipping %s: command `%s` failed (%v). Non-critical; backup continues. Ensure the required CLI is available and has proper permissions. Output: %s", description, cmdString, err, diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index 6324816..2a56e8b 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -346,7 +346,7 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv } // Cluster commands (if clustered) - if clustered { + if clustered && c.config.BackupClusterConfig { c.safeCmdOutput(ctx, "pvecm status", filepath.Join(commandsDir, "cluster_status.txt"), @@ -365,6 +365,8 @@ func (c *Collector) collectPVECommands(ctx context.Context, clustered bool) (*pv filepath.Join(commandsDir, "ha_status.json"), "HA status", false) + } else if clustered && !c.config.BackupClusterConfig { + c.logger.Debug("Skipping cluster runtime commands: BACKUP_CLUSTER_CONFIG=false (clustered=%v)", clustered) } // Storage status @@ -1651,25 +1653,53 @@ func (c *Collector) isClusteredPVE(ctx context.Context) (bool, error) { return false, err } + corosyncPath := c.effectiveCorosyncConfigPath() + corosyncExists := false + if corosyncPath != "" { + if _, err := c.depStat(corosyncPath); err == nil { + corosyncExists = true + } + } + c.logger.Debug("Cluster detection: corosyncPath=%q exists=%v", corosyncPath, corosyncExists) + if c.hasCorosyncClusterConfig() { c.logger.Debug("Detected cluster via corosync configuration") return true, nil } - if c.hasMultiplePVENodes() { - c.logger.Debug("Detected cluster via nodes directory count") - return true, nil + nodeCount, nodeErr := c.pveNodesDirCount() + if nodeErr != nil { + c.logger.Debug("Cluster detection: nodes directory count unavailable: %v", nodeErr) + } else { + c.logger.Debug("Cluster detection: nodes directory count=%d", nodeCount) + if nodeCount > 1 { + if corosyncExists { + c.logger.Debug("Detected cluster via nodes directory count (corosync config present)") + return true, nil + } + c.logger.Debug("Cluster detection: nodes directory suggests cluster, but corosync config missing; deferring to service/pvecm checks") + } } - if c.isServiceActive(ctx, "corosync.service") { - c.logger.Debug("Detected cluster via corosync.service state") - return true, nil + corosyncActive := c.isServiceActive(ctx, "corosync.service") + c.logger.Debug("Cluster detection: corosync.service active=%v", corosyncActive) + if corosyncActive { + if corosyncExists { + c.logger.Debug("Detected cluster via corosync.service state (corosync config present)") + return true, nil + } + c.logger.Debug("Cluster detection: corosync.service active but corosync config missing; deferring to pvecm check") } // Fallback to pvecm status if _, err := c.depLookPath("pvecm"); err == nil { output, err := c.depRunCommand(ctx, "pvecm", "status") if err != nil { + outText := strings.TrimSpace(string(output)) + if isPvecmMissingCorosyncConfig(outText) { + c.logger.Debug("Cluster detection: pvecm status indicates no corosync configuration; treating as standalone. Output: %s", summarizeCommandOutputText(outText)) + return false, nil + } return false, fmt.Errorf("pvecm status failed: %w", err) } clustered := strings.Contains(string(output), "Cluster information") @@ -1691,12 +1721,7 @@ func shortHostname(host string) string { } func (c *Collector) hasCorosyncClusterConfig() bool { - corosyncPath := c.config.CorosyncConfigPath - if corosyncPath == "" { - corosyncPath = filepath.Join(c.effectivePVEConfigPath(), "corosync.conf") - } else if !filepath.IsAbs(corosyncPath) { - corosyncPath = filepath.Join(c.effectivePVEConfigPath(), corosyncPath) - } + corosyncPath := c.effectiveCorosyncConfigPath() data, err := os.ReadFile(corosyncPath) if err != nil { return false @@ -1710,22 +1735,43 @@ func (c *Collector) hasCorosyncClusterConfig() bool { return false } +func (c *Collector) effectiveCorosyncConfigPath() string { + corosyncPath := strings.TrimSpace(c.config.CorosyncConfigPath) + if corosyncPath == "" { + return filepath.Join(c.effectivePVEConfigPath(), "corosync.conf") + } + if filepath.IsAbs(corosyncPath) { + return corosyncPath + } + return filepath.Join(c.effectivePVEConfigPath(), corosyncPath) +} + func (c *Collector) hasMultiplePVENodes() bool { + count, err := c.pveNodesDirCount() + return err == nil && count > 1 +} + +func (c *Collector) pveNodesDirCount() (int, error) { nodesDir := filepath.Join(c.effectivePVEConfigPath(), "nodes") entries, err := os.ReadDir(nodesDir) if err != nil { - return false + return 0, err } count := 0 for _, entry := range entries { if entry.IsDir() { count++ - if count > 1 { - return true - } } } - return false + return count, nil +} + +func isPvecmMissingCorosyncConfig(output string) bool { + if output == "" { + return false + } + lower := strings.ToLower(output) + return strings.Contains(lower, "corosync config") && strings.Contains(lower, "does not exist") } func (c *Collector) isServiceActive(ctx context.Context, service string) bool { diff --git a/internal/backup/collector_pve_test.go b/internal/backup/collector_pve_test.go index 4c3c38a..4d3ea8f 100644 --- a/internal/backup/collector_pve_test.go +++ b/internal/backup/collector_pve_test.go @@ -49,6 +49,44 @@ func TestIsClusteredPVEFallbackToPvecm(t *testing.T) { } } +func TestIsClusteredPVEMissingCorosyncConfigIsStandalone(t *testing.T) { + collector := newPVECollectorWithDeps(t, CollectorDeps{ + LookPath: func(cmd string) (string, error) { + switch cmd { + case "pvecm": + return "/usr/bin/pvecm", nil + case "systemctl": + return "", fmt.Errorf("missing systemctl") + default: + return "", fmt.Errorf("unexpected lookPath for %s", cmd) + } + }, + RunCommand: func(ctx context.Context, name string, args ...string) ([]byte, error) { + if name != "pvecm" { + return nil, fmt.Errorf("unexpected command %s", name) + } + if len(args) != 1 || args[0] != "status" { + return nil, fmt.Errorf("unexpected args %v", args) + } + return []byte("Error: Corosync config '/etc/pve/corosync.conf' does not exist - is this node part of a cluster?\n"), fmt.Errorf("exit status 2") + }, + }) + collector.config.CorosyncConfigPath = "" + + nodesDir := filepath.Join(collector.config.PVEConfigPath, "nodes", "single") + if err := os.MkdirAll(nodesDir, 0o755); err != nil { + t.Fatalf("failed to create nodes dir: %v", err) + } + + clustered, err := collector.isClusteredPVE(context.Background()) + if err != nil { + t.Fatalf("isClusteredPVE returned error: %v", err) + } + if clustered { + t.Fatal("expected clustered=false when pvecm reports missing corosync config") + } +} + func TestIsServiceActive(t *testing.T) { t.Run("missing systemctl", func(t *testing.T) { collector := newPVECollectorWithDeps(t, CollectorDeps{ @@ -140,6 +178,29 @@ func TestCephStorageConfigured(t *testing.T) { }) } +func TestCollectPVECommandsSkipsClusterRuntimeWhenBackupClusterConfigDisabled(t *testing.T) { + pvecmCalls := 0 + collector := newPVECollectorWithDeps(t, CollectorDeps{ + LookPath: func(cmd string) (string, error) { + return "/usr/bin/" + cmd, nil + }, + RunCommand: func(ctx context.Context, name string, args ...string) ([]byte, error) { + if name == "pvecm" { + pvecmCalls++ + } + return []byte{}, nil + }, + }) + collector.config.BackupClusterConfig = false + + if _, err := collector.collectPVECommands(context.Background(), true); err != nil { + t.Fatalf("collectPVECommands returned error: %v", err) + } + if pvecmCalls != 0 { + t.Fatalf("expected pvecm not to be called when BACKUP_CLUSTER_CONFIG=false, got %d calls", pvecmCalls) + } +} + func TestCephStatusAvailable(t *testing.T) { t.Run("cli missing", func(t *testing.T) { collector := newPVECollectorWithDeps(t, CollectorDeps{ diff --git a/internal/backup/collector_pve_util_test.go b/internal/backup/collector_pve_util_test.go index 3e0951f..759d2f7 100644 --- a/internal/backup/collector_pve_util_test.go +++ b/internal/backup/collector_pve_util_test.go @@ -684,6 +684,9 @@ func TestIsClusteredPVE(t *testing.T) { if err := os.MkdirAll(filepath.Join(nodesDir, "pve2"), 0o755); err != nil { t.Fatal(err) } + if err := os.WriteFile(filepath.Join(pveDir, "corosync.conf"), []byte("nodelist {}\n"), 0o644); err != nil { + t.Fatal(err) + } logger := newTestLogger() cfg := GetDefaultCollectorConfig() @@ -696,7 +699,33 @@ func TestIsClusteredPVE(t *testing.T) { t.Fatalf("isClusteredPVE error: %v", err) } if !clustered { - t.Error("isClusteredPVE() should return true with multiple nodes") + t.Error("isClusteredPVE() should return true when multiple nodes exist and corosync config is present") + } + }) + + t.Run("does not detect via multiple nodes alone", func(t *testing.T) { + tmpDir := t.TempDir() + pveDir := filepath.Join(tmpDir, "etc", "pve") + nodesDir := filepath.Join(pveDir, "nodes") + if err := os.MkdirAll(filepath.Join(nodesDir, "pve1"), 0o755); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(nodesDir, "pve2"), 0o755); err != nil { + t.Fatal(err) + } + + logger := newTestLogger() + cfg := GetDefaultCollectorConfig() + cfg.PVEConfigPath = pveDir + cfg.CorosyncConfigPath = "" + collector := NewCollector(logger, cfg, tmpDir, "pve", false) + + clustered, err := collector.isClusteredPVE(context.Background()) + if err != nil { + t.Fatalf("isClusteredPVE error: %v", err) + } + if clustered { + t.Error("isClusteredPVE() should return false when only multiple node directories exist without corosync config") } }) diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index b7837a1..9ac80f7 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -146,8 +146,9 @@ CLOUD_ENABLED=false # true-false = enable/disable cloud backup CLOUD_REMOTE=GoogleDrive # rclone remote NAME (from "rclone config") CLOUD_REMOTE_PATH=/proxsave/backup # Optional subdirectory within the remote # Cloud log storage path: -# - New style: path only (remote name is taken from CLOUD_REMOTE): /proxsave/log -# - Legacy style: full remote:path: myremote:/proxsave/log +# - Recommended (same remote): path-only (remote name is taken from CLOUD_REMOTE), no trailing slash: proxsave/log +# - New style (also ok): /proxsave/log +# - Legacy / different remote: explicit remote:path: myremote:proxsave/log (or myremote:/proxsave/log) CLOUD_LOG_PATH=/proxsave/log CLOUD_UPLOAD_MODE=parallel # sequential | parallel (parallel uses a worker pool for associated/bundle files) CLOUD_PARALLEL_MAX_JOBS=2 # Parallel workers when CLOUD_UPLOAD_MODE=parallel @@ -268,6 +269,8 @@ METRICS_PATH=${BASE_DIR}/metrics # Collector options # ---------------------------------------------------------------------- # PVE +# BACKUP_CLUSTER_CONFIG controls both cluster config collection and cluster runtime commands +# (corosync.conf / pve-cluster data, pvecm status/nodes, and HA status collection). BACKUP_CLUSTER_CONFIG=true BACKUP_PVE_FIREWALL=true BACKUP_VZDUMP_CONFIG=true diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index c2e1113..3b3ad0a 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=2228394 +pid=2297463 host=pve -time=2026-01-21T16:03:32+01:00 +time=2026-01-21T17:57:13+01:00 From 8a674297cab271c98b121b34da72bdf85a5050c7 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 15:50:57 +0100 Subject: [PATCH 09/10] Handle non-root and ignorable errors in datastore ownership Updated setDatastoreOwnership to skip chown/chmod operations when running as non-root and to treat EPERM, EACCES, and EROFS errors as non-fatal. This improves robustness in CI/test environments and on restricted filesystems by logging warnings and continuing execution. --- internal/orchestrator/.backup.lock | 4 ++-- internal/orchestrator/directory_recreation.go | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 3b3ad0a..74e0e6e 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=2297463 +pid=2327386 host=pve -time=2026-01-21T17:57:13+01:00 +time=2026-01-21T18:36:49+01:00 diff --git a/internal/orchestrator/directory_recreation.go b/internal/orchestrator/directory_recreation.go index 12f4b53..19dc9f3 100644 --- a/internal/orchestrator/directory_recreation.go +++ b/internal/orchestrator/directory_recreation.go @@ -777,6 +777,13 @@ func isLikelyZFSMountPoint(path string, logger *logging.Logger) bool { // setDatastoreOwnership sets ownership to backup:backup for PBS datastores func setDatastoreOwnership(path string, logger *logging.Logger) error { + if os.Geteuid() != 0 { + // Ownership/permission adjustments are best-effort and should not block + // directory recreation when running without privileges (common in CI/tests). + logger.Debug("PBS datastore ownership: running as non-root (euid=%d); skipping chown/chmod for %s", os.Geteuid(), path) + return nil + } + backupUser, err := user.Lookup("backup") if err != nil { // On non-PBS systems the user may not exist; treat as non-fatal. @@ -794,6 +801,10 @@ func setDatastoreOwnership(path string, logger *logging.Logger) error { logger.Debug("PBS datastore ownership: chown %s to backup:backup (uid=%d gid=%d)", path, uid, gid) if err := os.Chown(path, uid, gid); err != nil { + if isIgnorableOwnershipError(err) { + logger.Warning("PBS datastore ownership: unable to chown %s to backup:backup (uid=%d gid=%d): %v (continuing)", path, uid, gid, err) + return nil + } return fmt.Errorf("chown %s: %w", path, err) } @@ -809,6 +820,10 @@ func setDatastoreOwnership(path string, logger *logging.Logger) error { if desired != current { logger.Debug("PBS datastore permissions: chmod %s from %o to %o", path, current, desired) if err := os.Chmod(path, desired); err != nil { + if isIgnorableOwnershipError(err) { + logger.Warning("PBS datastore permissions: unable to chmod %s from %o to %o: %v (continuing)", path, current, desired, err) + return nil + } return fmt.Errorf("chmod %s: %w", path, err) } } @@ -817,6 +832,13 @@ func setDatastoreOwnership(path string, logger *logging.Logger) error { return nil } +func isIgnorableOwnershipError(err error) bool { + // Common "can't chown/chmod here" situations: + // - EPERM/EACCES: not permitted (non-root, user namespace restrictions, etc.) + // - EROFS: read-only filesystem + return errors.Is(err, syscall.EPERM) || errors.Is(err, syscall.EACCES) || errors.Is(err, syscall.EROFS) +} + // RecreateDirectoriesFromConfig recreates storage/datastore directories based on system type func RecreateDirectoriesFromConfig(systemType SystemType, logger *logging.Logger) error { logger.Info("Recreating directory structures from configuration...") From fafdf5d71d8a726fed0f6336fd90b4c8e13cbf8d Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 22 Jan 2026 16:55:19 +0100 Subject: [PATCH 10/10] Improve rclone cloud scan timeout handling and docs Cloud restore/decrypt workflows now use RCLONE_TIMEOUT_CONNECTION to cap scan duration, with improved error messages and debug logging for timeouts. Documentation and config templates have been updated to clarify the timeout's role in cloud scanning, and troubleshooting steps for slow remotes or large directories are provided. --- docs/CLOUD_STORAGE.md | 4 ++- docs/CONFIGURATION.md | 4 +-- docs/RESTORE_GUIDE.md | 1 + docs/TROUBLESHOOTING.md | 22 +++++++++++++++++ internal/config/templates/backup.env | 1 + internal/orchestrator/.backup.lock | 4 +-- internal/orchestrator/backup_sources.go | 7 ++++-- internal/orchestrator/decrypt.go | 33 ++++++++++++++++++++++--- internal/orchestrator/decrypt_tui.go | 18 ++++++++++++-- internal/orchestrator/restore_tui.go | 17 +++++++++++-- 10 files changed, 97 insertions(+), 14 deletions(-) diff --git a/docs/CLOUD_STORAGE.md b/docs/CLOUD_STORAGE.md index d68590d..7707fc8 100644 --- a/docs/CLOUD_STORAGE.md +++ b/docs/CLOUD_STORAGE.md @@ -383,7 +383,7 @@ RETENTION_YEARLY=3 | `CLOUD_PARALLEL_MAX_JOBS` | `2` | Max concurrent uploads (parallel mode) | | `CLOUD_PARALLEL_VERIFICATION` | `true` | Verify checksums after upload | | `CLOUD_WRITE_HEALTHCHECK` | `false` | Use write test for connectivity check | -| `RCLONE_TIMEOUT_CONNECTION` | `30` | Connection timeout (seconds) | +| `RCLONE_TIMEOUT_CONNECTION` | `30` | Connection timeout (seconds). Also used by restore/decrypt when scanning cloud backups (list + manifest read). | | `RCLONE_TIMEOUT_OPERATION` | `300` | Operation timeout (seconds) | | `RCLONE_BANDWIDTH_LIMIT` | _(empty)_ | Upload rate limit (e.g., `5M` = 5 MB/s) | | `RCLONE_TRANSFERS` | `4` | Number of parallel transfers | @@ -437,6 +437,7 @@ path inside the remote, and uses that consistently for: - **uploads** (cloud backend); - **cloud retention**; - **restore / decrypt menus** (entry “Cloud backups (rclone)”). + - Restore/decrypt cloud scanning is protected by `RCLONE_TIMEOUT_CONNECTION` (increase it on slow remotes or very large directories). You can choose the style you prefer; they are equivalent from the tool’s point of view. @@ -616,6 +617,7 @@ rm /tmp/test*.txt | `couldn't find configuration section 'gdrive'` | Remote not configured | `rclone config` → create remote | | `401 unauthorized` | Credentials expired | `rclone config reconnect gdrive` or regenerate keys | | `connection timeout (30s)` | Slow network | Increase `RCLONE_TIMEOUT_CONNECTION=60` | +| `Timed out while scanning ... (rclone)` | Slow remote / huge directory | Increase `RCLONE_TIMEOUT_CONNECTION` and re-try restore/decrypt scan | | `operation timeout (300s exceeded)` | Large file + slow network | Increase `RCLONE_TIMEOUT_OPERATION=900` | | `429 Too Many Requests` | API rate limiting | Reduce `RCLONE_TRANSFERS=2`, increase `CLOUD_BATCH_PAUSE=3` | | `directory not found` | Path doesn't exist | `rclone mkdir gdrive:pbs-backups` | diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 058574c..f17a209 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -548,7 +548,7 @@ Quick comparison to help you choose the right storage configuration: ```bash # Connection timeout (seconds) -RCLONE_TIMEOUT_CONNECTION=30 # Remote accessibility check +RCLONE_TIMEOUT_CONNECTION=30 # Remote accessibility check (also used for restore/decrypt cloud scan) # Operation timeout (seconds) RCLONE_TIMEOUT_OPERATION=300 # Upload/download operations (5 minutes default) @@ -571,7 +571,7 @@ RCLONE_FLAGS="--checkers=4 --stats=0 --drive-use-trash=false --drive-pacer-min-s ### Timeout Tuning -- **CONNECTION**: Short timeout for quick accessibility check (default 30s) +- **CONNECTION**: Short timeout for quick accessibility check (default 30s); also caps restore/decrypt cloud scanning (listing backups + reading manifests) - **OPERATION**: Long timeout for large file uploads (increase for slow networks) ### Bandwidth Limit Format diff --git a/docs/RESTORE_GUIDE.md b/docs/RESTORE_GUIDE.md index 3713273..cffac04 100644 --- a/docs/RESTORE_GUIDE.md +++ b/docs/RESTORE_GUIDE.md @@ -2571,6 +2571,7 @@ A: Yes, in two ways: - lists `.bundle.tar` bundles on the remote with `rclone lsf`; - reads metadata/manifest via `rclone cat` (without downloading everything); - when you pick a backup, downloads it to `/tmp/proxsave` and proceeds with decrypt/restore. + - If scanning times out (slow remote / huge directory), increase `RCLONE_TIMEOUT_CONNECTION` and retry. 2. **From a local rclone mount (restore-only)** If you prefer to mount the rclone backend as a local filesystem: diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 09ade05..67905f2 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -277,6 +277,28 @@ iptables -L -n | grep -i drop --- +#### Restore/Decrypt: stuck on “Scanning backup path…” or timeout (cloud/rclone) + +**Cause**: The tool scans cloud backups by listing the remote (`rclone lsf`) and reading each backup manifest (`rclone cat`). On slow remotes or very large directories this can time out. + +**Solution**: +```bash +# Increase scan timeout +nano configs/backup.env +RCLONE_TIMEOUT_CONNECTION=120 + +# Re-run restore with debug logs (restore log path is printed on start) +./build/proxsave --restore --log-level debug +``` + +If it still fails, run the equivalent manual checks: +```bash +rclone lsf +rclone cat /.bundle.tar | head +``` + +--- + #### Error: `operation timeout (300s exceeded)` **Cause**: Large backup file + slow upload speed. diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 9ac80f7..17cef09 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -160,6 +160,7 @@ CLOUD_WRITE_HEALTHCHECK=false # false = auto (list + fallback to write te # ---------------------------------------------------------------------- # CONNECTION timeout: remote accessibility check (short) # OPERATION timeout: full upload/download operations (long) +# NOTE: The connection timeout is also used by restore/decrypt workflows when scanning cloud backups. RCLONE_TIMEOUT_CONNECTION=30 # seconds RCLONE_TIMEOUT_OPERATION=300 # seconds RCLONE_BANDWIDTH_LIMIT="10M" # e.g. "10M" for 10 MB/s, empty = unlimited diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 74e0e6e..2a493a8 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=2327386 +pid=2367035 host=pve -time=2026-01-21T18:36:49+01:00 +time=2026-01-21T19:42:55+01:00 diff --git a/internal/orchestrator/backup_sources.go b/internal/orchestrator/backup_sources.go index a4669de..8642ae3 100644 --- a/internal/orchestrator/backup_sources.go +++ b/internal/orchestrator/backup_sources.go @@ -87,6 +87,7 @@ func buildDecryptPathOptions(cfg *config.Config, logger *logging.Logger) (option func discoverRcloneBackups(ctx context.Context, remotePath string, logger *logging.Logger) (candidates []*decryptCandidate, err error) { done := logging.DebugStart(logger, "discover rclone backups", "remote=%s", remotePath) defer func() { done(err) }() + start := time.Now() // Build full remote path - ensure it ends with ":" if it's just a remote name fullPath := strings.TrimSpace(remotePath) if !strings.Contains(fullPath, ":") { @@ -100,11 +101,12 @@ func discoverRcloneBackups(ctx context.Context, remotePath string, logger *loggi // Use rclone lsf to list files inside the backup directory cmd := exec.CommandContext(ctx, "rclone", "lsf", fullPath) + lsfStart := time.Now() output, err := cmd.CombinedOutput() if err != nil { return nil, fmt.Errorf("failed to list rclone remote %s: %w (output: %s)", fullPath, err, string(output)) } - logging.DebugStep(logger, "discover rclone backups", "rclone lsf output bytes=%d", len(output)) + logging.DebugStep(logger, "discover rclone backups", "rclone lsf output bytes=%d elapsed=%s", len(output), time.Since(lsfStart)) candidates = make([]*decryptCandidate, 0) lines := strings.Split(string(output), "\n") @@ -227,12 +229,13 @@ func discoverRcloneBackups(ctx context.Context, remotePath string, logger *loggi logging.DebugStep( logger, "discover rclone backups", - "summary entries=%d empty=%d non_candidate=%d manifest_errors=%d accepted=%d", + "summary entries=%d empty=%d non_candidate=%d manifest_errors=%d accepted=%d elapsed=%s", totalEntries, emptyEntries, nonCandidateEntries, manifestErrors, len(candidates), + time.Since(start), ) logDebug(logger, "Cloud (rclone): scanned %d entries, found %d valid backup candidate(s)", len(lines), len(candidates)) logDebug(logger, "Cloud (rclone): discovered %d bundle candidate(s) in %s", len(candidates), fullPath) diff --git a/internal/orchestrator/decrypt.go b/internal/orchestrator/decrypt.go index a1614da..fa4fbdb 100644 --- a/internal/orchestrator/decrypt.go +++ b/internal/orchestrator/decrypt.go @@ -355,19 +355,32 @@ func inspectBundleManifest(bundlePath string) (*backup.Manifest, error) { func inspectRcloneBundleManifest(ctx context.Context, remotePath string, logger *logging.Logger) (manifest *backup.Manifest, err error) { done := logging.DebugStart(logger, "inspect rclone bundle manifest", "remote=%s", remotePath) defer func() { done(err) }() + start := time.Now() + + // Use a child context so we can stop rclone once the manifest is found. + // This avoids a deadlock when the manifest is near the beginning of the tar: + // if we stop reading stdout early and still Wait(), rclone can block writing + // the remaining bytes into a full pipe. + cmdCtx, cancel := context.WithCancel(ctx) + defer cancel() + logging.DebugStep(logger, "inspect rclone bundle manifest", "executing: rclone cat %s", remotePath) - cmd := exec.CommandContext(ctx, "rclone", "cat", remotePath) + cmd := exec.CommandContext(cmdCtx, "rclone", "cat", remotePath) stdout, err := cmd.StdoutPipe() if err != nil { return nil, fmt.Errorf("open rclone stream: %w", err) } - cmd.Stderr = os.Stderr + defer stdout.Close() + + var stderr bytes.Buffer + cmd.Stderr = &stderr if err := cmd.Start(); err != nil { return nil, fmt.Errorf("start rclone cat: %w", err) } tr := tar.NewReader(stdout) + cancelledEarly := false for { hdr, err := tr.Next() @@ -394,6 +407,8 @@ func inspectRcloneBundleManifest(ctx context.Context, remotePath string, logger } manifest = &m logging.DebugStep(logger, "inspect rclone bundle manifest", "manifest entry=%s bytes=%d", hdr.Name, len(data)) + cancelledEarly = true + cancel() break } } @@ -402,13 +417,25 @@ func inspectRcloneBundleManifest(ctx context.Context, remotePath string, logger // because stopping early may cause rclone to see a broken pipe. waitErr := cmd.Wait() if manifest == nil { + stderrMsg := strings.TrimSpace(stderr.String()) if waitErr != nil { + if stderrMsg != "" { + return nil, fmt.Errorf("manifest not found inside remote bundle (rclone exited with error): %w (stderr: %s)", waitErr, stderrMsg) + } return nil, fmt.Errorf("manifest not found inside remote bundle (rclone exited with error): %w", waitErr) } + if stderrMsg != "" { + return nil, fmt.Errorf("manifest not found inside remote bundle %s (stderr: %s)", filepath.Base(remotePath), stderrMsg) + } return nil, fmt.Errorf("manifest not found inside remote bundle %s", filepath.Base(remotePath)) } if waitErr != nil { - logger.Debug("rclone cat %s completed with non-zero status after manifest read: %v", remotePath, waitErr) + // If we cancelled early, rclone is expected to exit non-zero (broken pipe / killed). + if cancelledEarly { + logDebug(logger, "rclone cat %s stopped early after manifest read: %v (elapsed=%s stderr=%q)", remotePath, waitErr, time.Since(start), strings.TrimSpace(stderr.String())) + } else { + logDebug(logger, "rclone cat %s completed with non-zero status after manifest read: %v (elapsed=%s stderr=%q)", remotePath, waitErr, time.Since(start), strings.TrimSpace(stderr.String())) + } } return manifest, nil diff --git a/internal/orchestrator/decrypt_tui.go b/internal/orchestrator/decrypt_tui.go index ef064f2..a45af1a 100644 --- a/internal/orchestrator/decrypt_tui.go +++ b/internal/orchestrator/decrypt_tui.go @@ -7,6 +7,8 @@ import ( "os" "path/filepath" "strings" + "sync/atomic" + "time" "filippo.io/age" "github.com/gdamore/tcell/v2" @@ -154,6 +156,7 @@ func runDecryptSelectionWizard(ctx context.Context, cfg *config.Config, logger * selection = &decryptSelection{} var selectionErr error var scan scanController + var scanSeq uint64 pathList := tview.NewList().ShowSecondaryText(false) pathList.SetMainTextColor(tcell.ColorWhite). @@ -171,6 +174,7 @@ func runDecryptSelectionWizard(ctx context.Context, cfg *config.Config, logger * return } selectedOption := options[index] + scanID := atomic.AddUint64(&scanSeq, 1) logging.DebugStep(logger, "decrypt selection wizard", "selected source label=%q path=%q rclone=%v", selectedOption.Label, selectedOption.Path, selectedOption.IsRclone) pages.SwitchToPage("loading") go func() { @@ -179,11 +183,18 @@ func runDecryptSelectionWizard(ctx context.Context, cfg *config.Config, logger * var candidates []*decryptCandidate var scanErr error - scanDone := logging.DebugStart(logger, "scan backup source", "path=%s rclone=%v", selectedOption.Path, selectedOption.IsRclone) + scanDone := logging.DebugStart(logger, "scan backup source", "id=%d path=%s rclone=%v", scanID, selectedOption.Path, selectedOption.IsRclone) defer func() { scanDone(scanErr) }() if selectedOption.IsRclone { - candidates, scanErr = discoverRcloneBackups(scanCtx, selectedOption.Path, logger) + timeout := 30 * time.Second + if cfg != nil && cfg.RcloneTimeoutConnection > 0 { + timeout = time.Duration(cfg.RcloneTimeoutConnection) * time.Second + } + logging.DebugStep(logger, "scan backup source", "id=%d rclone_timeout=%s", scanID, timeout) + rcloneCtx, cancel := context.WithTimeout(scanCtx, timeout) + defer cancel() + candidates, scanErr = discoverRcloneBackups(rcloneCtx, selectedOption.Path, logger) } else { candidates, scanErr = discoverBackupCandidates(logger, selectedOption.Path) } @@ -195,6 +206,9 @@ func runDecryptSelectionWizard(ctx context.Context, cfg *config.Config, logger * app.QueueUpdateDraw(func() { if scanErr != nil { message := fmt.Sprintf("Failed to inspect %s: %v", selectedOption.Path, scanErr) + if selectedOption.IsRclone && errors.Is(scanErr, context.DeadlineExceeded) { + message = fmt.Sprintf("Timed out while scanning %s (rclone). Check connectivity/rclone config or increase RCLONE_TIMEOUT_CONNECTION. (%v)", selectedOption.Path, scanErr) + } showErrorModal(app, pages, configPath, buildSig, message, func() { pages.SwitchToPage("paths") }) diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index e7cfd8b..898e812 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -9,6 +9,7 @@ import ( "path/filepath" "sort" "strings" + "sync/atomic" "time" "github.com/gdamore/tcell/v2" @@ -545,6 +546,7 @@ func runRestoreSelectionWizard(ctx context.Context, cfg *config.Config, logger * selection := &restoreSelection{} var selectionErr error var scan scanController + var scanSeq uint64 pathList := tview.NewList().ShowSecondaryText(false) pathList.SetMainTextColor(tcell.ColorWhite). @@ -562,6 +564,7 @@ func runRestoreSelectionWizard(ctx context.Context, cfg *config.Config, logger * return } selectedOption := options[index] + scanID := atomic.AddUint64(&scanSeq, 1) logging.DebugStep(logger, "restore selection wizard", "selected source label=%q path=%q rclone=%v", selectedOption.Label, selectedOption.Path, selectedOption.IsRclone) pages.SwitchToPage("paths-loading") go func() { @@ -570,11 +573,18 @@ func runRestoreSelectionWizard(ctx context.Context, cfg *config.Config, logger * var candidates []*decryptCandidate var scanErr error - scanDone := logging.DebugStart(logger, "scan backup source", "path=%s rclone=%v", selectedOption.Path, selectedOption.IsRclone) + scanDone := logging.DebugStart(logger, "scan backup source", "id=%d path=%s rclone=%v", scanID, selectedOption.Path, selectedOption.IsRclone) defer func() { scanDone(scanErr) }() if selectedOption.IsRclone { - candidates, scanErr = discoverRcloneBackups(scanCtx, selectedOption.Path, logger) + timeout := 30 * time.Second + if cfg != nil && cfg.RcloneTimeoutConnection > 0 { + timeout = time.Duration(cfg.RcloneTimeoutConnection) * time.Second + } + logging.DebugStep(logger, "scan backup source", "id=%d rclone_timeout=%s", scanID, timeout) + rcloneCtx, cancel := context.WithTimeout(scanCtx, timeout) + defer cancel() + candidates, scanErr = discoverRcloneBackups(rcloneCtx, selectedOption.Path, logger) } else { candidates, scanErr = discoverBackupCandidates(logger, selectedOption.Path) } @@ -586,6 +596,9 @@ func runRestoreSelectionWizard(ctx context.Context, cfg *config.Config, logger * app.QueueUpdateDraw(func() { if scanErr != nil { message := fmt.Sprintf("Failed to inspect %s: %v", selectedOption.Path, scanErr) + if selectedOption.IsRclone && errors.Is(scanErr, context.DeadlineExceeded) { + message = fmt.Sprintf("Timed out while scanning %s (rclone). Check connectivity/rclone config or increase RCLONE_TIMEOUT_CONNECTION. (%v)", selectedOption.Path, scanErr) + } showRestoreErrorModal(app, pages, configPath, buildSig, message, func() { pages.SwitchToPage("paths") })