From 686acbfc801cb1032e3af7bbbe7319184c9cad38 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Wed, 4 Feb 2026 22:17:54 +0100 Subject: [PATCH 1/9] Clean up parse comment and fill UpgradeResult Remove duplicated comment lines above Config.parse to avoid redundant documentation. In computeConfigUpgrade, ensure UpgradeResult is fully populated (MissingKeys, ExtraKeys, CaseConflictKeys, PreservedValues, Warnings) when no content changes are detected so callers receive complete upgrade metadata. --- internal/config/config.go | 2 -- internal/config/upgrade.go | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 9229672..91ecfbf 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -326,8 +326,6 @@ func (c *Config) loadEnvOverrides() { } } -// parse interprets raw configuration values. -// It supports both legacy and new backup.env formats. // parse interprets raw configuration values. // It supports both legacy and new backup.env formats. func (c *Config) parse() error { diff --git a/internal/config/upgrade.go b/internal/config/upgrade.go index 71d54ee..5db19e7 100644 --- a/internal/config/upgrade.go +++ b/internal/config/upgrade.go @@ -394,9 +394,11 @@ func computeConfigUpgrade(configPath string) (*UpgradeResult, string, []byte, er newContent := strings.Join(newLines, lineEnding) if newContent == string(originalContent) { result.Changed = false - result.Warnings = warnings + result.MissingKeys = missingKeys result.ExtraKeys = extraKeys + result.CaseConflictKeys = caseConflictKeys result.PreservedValues = preserved + result.Warnings = warnings return result, "", originalContent, nil } From 90b3790c49ee151a34fc932f0f2cf4d0f1c2a866 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 12:04:33 +0100 Subject: [PATCH 2/9] logging: capture issues and add counters Add an issueLines slice to Logger and capture WARNING/ERROR/CRITICAL log entries in logWithLabel as single-line entries (sanitizing CR/LF) for end-of-run summaries. Introduce thread-safe accessors WarningCount, ErrorCount and IssueLines to retrieve counts and the captured issue lines. Import strings to support sanitization. Also includes an updated .backup.lock metadata change (pid/time). --- internal/logging/logger.go | 38 ++++++++++++++++++++++++++++++ internal/orchestrator/.backup.lock | 4 ++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/internal/logging/logger.go b/internal/logging/logger.go index 5f000e2..ebb8051 100644 --- a/internal/logging/logger.go +++ b/internal/logging/logger.go @@ -4,6 +4,7 @@ import ( "fmt" "io" "os" + "strings" "sync" "time" @@ -20,6 +21,7 @@ type Logger struct { logFile *os.File // Log file (optional) warningCount int64 errorCount int64 + issueLines []string // Captured WARNING/ERROR/CRITICAL lines for end-of-run summary exitFunc func(int) } @@ -185,6 +187,15 @@ func (l *Logger) logWithLabel(level types.LogLevel, label string, colorOverride message, ) + // Capture warnings/errors for final summary output (single-line). + switch level { + case types.LogLevelWarning, types.LogLevelError, types.LogLevelCritical: + issue := fmt.Sprintf("[%s] %-8s %s", timestamp, levelStr, message) + issue = strings.ReplaceAll(issue, "\r", " ") + issue = strings.ReplaceAll(issue, "\n", " ") + l.issueLines = append(l.issueLines, issue) + } + // Write to stdout with colors. fmt.Fprint(l.output, outputStdout) @@ -208,6 +219,33 @@ func (l *Logger) HasErrors() bool { return l.errorCount > 0 } +// WarningCount returns the total number of WARNING log entries emitted. +func (l *Logger) WarningCount() int64 { + l.mu.Lock() + defer l.mu.Unlock() + return l.warningCount +} + +// ErrorCount returns the total number of ERROR/CRITICAL log entries emitted. +func (l *Logger) ErrorCount() int64 { + l.mu.Lock() + defer l.mu.Unlock() + return l.errorCount +} + +// IssueLines returns a copy of captured WARNING/ERROR/CRITICAL log lines in +// chronological order. Intended for end-of-run summaries. +func (l *Logger) IssueLines() []string { + l.mu.Lock() + defer l.mu.Unlock() + if len(l.issueLines) == 0 { + return nil + } + out := make([]string, len(l.issueLines)) + copy(out, l.issueLines) + return out +} + // Debug writes a debug log. func (l *Logger) Debug(format string, args ...interface{}) { l.log(types.LogLevelDebug, format, args...) diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index fffdac2..1a4fceb 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=99930 +pid=21807 host=pve -time=2026-02-03T19:42:03+01:00 +time=2026-02-05T11:58:08+01:00 From 9362d9d228c3334175082b37070b8993af606fdf Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 13:21:40 +0100 Subject: [PATCH 3/9] Add granular PBS collection toggles & log summary Introduce fine-grained PBS collection flags (S3 endpoints, node config, ACME accounts/plugins, metric servers, traffic control, PBS network config) across config, collector, orchestrator and env templates so subcomponents can be independently enabled/disabled. Update collector logic to respect these flags (exclude files, skip commands and snapshots when disabled) and extend collectPBSConfigFile to accept a disable-hint used in clearer log messages. Add a final-run summary that prints WARNING/ERROR log lines to help surface root causes. Update docs, tests and templates to reflect the new options. --- cmd/proxsave/main.go | 19 ++- docs/BACKUP_ENV_MAPPING.md | 7 + internal/backup/collector.go | 52 +++--- internal/backup/collector_pbs.go | 166 ++++++++++++-------- internal/backup/collector_pbs_extra_test.go | 3 +- internal/config/config.go | 15 ++ internal/config/templates/backup.env | 7 + internal/orchestrator/.backup.lock | 4 +- internal/orchestrator/orchestrator.go | 7 + 9 files changed, 192 insertions(+), 88 deletions(-) diff --git a/cmd/proxsave/main.go b/cmd/proxsave/main.go index 5f5aa19..7241cd8 100644 --- a/cmd/proxsave/main.go +++ b/cmd/proxsave/main.go @@ -1581,6 +1581,24 @@ func printNetworkRollbackCountdown(abortInfo *orchestrator.RestoreAbortInfo) { func printFinalSummary(finalExitCode int) { fmt.Println() + // Print a flat list of all WARNING/ERROR/CRITICAL log entries that occurred during the run. + // This makes it easy to spot the root cause(s) behind a WARNING/ERROR exit status without + // scrolling through the full log. + logger := logging.GetDefaultLogger() + if logger != nil { + issues := logger.IssueLines() + if len(issues) > 0 { + fmt.Println("===========================================") + fmt.Printf("WARNINGS/ERRORS DURING RUN (warnings=%d errors=%d)\n", logger.WarningCount(), logger.ErrorCount()) + fmt.Println() + for _, line := range issues { + fmt.Println(line) + } + fmt.Println("===========================================") + fmt.Println() + } + } + summarySig := buildSignature() if summarySig == "" { summarySig = "unknown" @@ -1588,7 +1606,6 @@ func printFinalSummary(finalExitCode int) { colorReset := "\033[0m" color := "" - logger := logging.GetDefaultLogger() hasWarnings := logger != nil && logger.HasWarnings() switch { diff --git a/docs/BACKUP_ENV_MAPPING.md b/docs/BACKUP_ENV_MAPPING.md index 798bdcb..9d1057e 100644 --- a/docs/BACKUP_ENV_MAPPING.md +++ b/docs/BACKUP_ENV_MAPPING.md @@ -88,6 +88,13 @@ WEBHOOK_TIMEOUT = SAME ## Go-only variables (new) SYSTEM_ROOT_PREFIX = NEW (Go-only) → Override system root for collection (testing/chroot). Empty or "/" uses the real root. +BACKUP_PBS_S3_ENDPOINTS = NEW (Go-only) → Collect `s3.cfg` and S3 endpoint snapshots (PBS). +BACKUP_PBS_NODE_CONFIG = NEW (Go-only) → Collect `node.cfg` and node snapshots (PBS). +BACKUP_PBS_ACME_ACCOUNTS = NEW (Go-only) → Collect `acme/accounts.cfg` and ACME account snapshots (PBS). +BACKUP_PBS_ACME_PLUGINS = NEW (Go-only) → Collect `acme/plugins.cfg` and ACME plugin snapshots (PBS). +BACKUP_PBS_METRIC_SERVERS = NEW (Go-only) → Collect `metricserver.cfg` (PBS). +BACKUP_PBS_TRAFFIC_CONTROL = NEW (Go-only) → Collect `traffic-control.cfg` and traffic-control snapshots (PBS). +BACKUP_PBS_NETWORK_CONFIG = NEW (Go-only) → Collect `network.cfg` and network snapshots (PBS), independent from BACKUP_NETWORK_CONFIGS (system). ## Renamed variables / Supported aliases in Go diff --git a/internal/backup/collector.go b/internal/backup/collector.go index ea6a4c3..8fb0d5a 100644 --- a/internal/backup/collector.go +++ b/internal/backup/collector.go @@ -155,14 +155,21 @@ type CollectorConfig struct { CephConfigPath string // PBS-specific collection options - BackupDatastoreConfigs bool - BackupUserConfigs bool - BackupRemoteConfigs bool - BackupSyncJobs bool - BackupVerificationJobs bool - BackupTapeConfigs bool - BackupPruneSchedules bool - BackupPxarFiles bool + BackupDatastoreConfigs bool + BackupPBSS3Endpoints bool + BackupPBSNodeConfig bool + BackupPBSAcmeAccounts bool + BackupPBSAcmePlugins bool + BackupPBSMetricServers bool + BackupPBSTrafficControl bool + BackupUserConfigs bool + BackupRemoteConfigs bool + BackupSyncJobs bool + BackupVerificationJobs bool + BackupTapeConfigs bool + BackupPBSNetworkConfig bool + BackupPruneSchedules bool + BackupPxarFiles bool // System collection options BackupNetworkConfigs bool @@ -242,9 +249,11 @@ func (c *CollectorConfig) Validate() error { c.BackupPVEFirewall || c.BackupVZDumpConfig || c.BackupPVEACL || c.BackupPVEJobs || c.BackupPVESchedules || c.BackupPVEReplication || c.BackupPVEBackupFiles || c.BackupCephConfig || - c.BackupDatastoreConfigs || c.BackupUserConfigs || c.BackupRemoteConfigs || + c.BackupDatastoreConfigs || c.BackupPBSS3Endpoints || c.BackupPBSNodeConfig || + c.BackupPBSAcmeAccounts || c.BackupPBSAcmePlugins || c.BackupPBSMetricServers || + c.BackupPBSTrafficControl || c.BackupUserConfigs || c.BackupRemoteConfigs || c.BackupSyncJobs || c.BackupVerificationJobs || c.BackupTapeConfigs || - c.BackupPruneSchedules || c.BackupPxarFiles || + c.BackupPBSNetworkConfig || c.BackupPruneSchedules || c.BackupPxarFiles || c.BackupNetworkConfigs || c.BackupAptSources || c.BackupCronJobs || c.BackupSystemdServices || c.BackupSSLCerts || c.BackupSysctlConfig || c.BackupKernelModules || c.BackupFirewallRules || @@ -322,14 +331,21 @@ func GetDefaultCollectorConfig() *CollectorConfig { CephConfigPath: "/etc/ceph", // PBS-specific (all enabled by default) - BackupDatastoreConfigs: true, - BackupUserConfigs: true, - BackupRemoteConfigs: true, - BackupSyncJobs: true, - BackupVerificationJobs: true, - BackupTapeConfigs: true, - BackupPruneSchedules: true, - BackupPxarFiles: true, + BackupDatastoreConfigs: true, + BackupPBSS3Endpoints: true, + BackupPBSNodeConfig: true, + BackupPBSAcmeAccounts: true, + BackupPBSAcmePlugins: true, + BackupPBSMetricServers: true, + BackupPBSTrafficControl: true, + BackupUserConfigs: true, + BackupRemoteConfigs: true, + BackupSyncJobs: true, + BackupVerificationJobs: true, + BackupTapeConfigs: true, + BackupPBSNetworkConfig: true, + BackupPruneSchedules: true, + BackupPxarFiles: true, // System collection (all enabled by default) BackupNetworkConfigs: true, diff --git a/internal/backup/collector_pbs.go b/internal/backup/collector_pbs.go index c419a7b..6759b00 100644 --- a/internal/backup/collector_pbs.go +++ b/internal/backup/collector_pbs.go @@ -18,10 +18,14 @@ func (c *Collector) pbsConfigPath() string { } // collectPBSConfigFile collects a single PBS configuration file with detailed logging -func (c *Collector) collectPBSConfigFile(ctx context.Context, root, filename, description string, enabled bool) ManifestEntry { +func (c *Collector) collectPBSConfigFile(ctx context.Context, root, filename, description string, enabled bool, disableHint string) ManifestEntry { if !enabled { c.logger.Debug("Skipping %s: disabled by configuration", filename) - c.logger.Info(" %s: disabled", description) + if strings.TrimSpace(disableHint) != "" { + c.logger.Info(" %s: disabled (%s=false)", description, disableHint) + } else { + c.logger.Info(" %s: disabled", description) + } return ManifestEntry{Status: StatusDisabled} } @@ -41,7 +45,11 @@ func (c *Collector) collectPBSConfigFile(ctx context.Context, root, filename, de if os.IsNotExist(err) { c.incFilesNotFound() c.logger.Debug(" File not found: %v", err) - c.logger.Info(" %s: not configured", description) + if strings.TrimSpace(disableHint) != "" { + c.logger.Warning(" %s: not configured. If unused, set %s=false to disable.", description, disableHint) + } else { + c.logger.Warning(" %s: not configured", description) + } return ManifestEntry{Status: StatusNotFound} } if err != nil { @@ -188,6 +196,24 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro if !c.config.BackupDatastoreConfigs { extraExclude = append(extraExclude, "datastore.cfg") } + if !c.config.BackupDatastoreConfigs || !c.config.BackupPBSS3Endpoints { + extraExclude = append(extraExclude, "s3.cfg") + } + if !c.config.BackupPBSNodeConfig { + extraExclude = append(extraExclude, "node.cfg") + } + if !c.config.BackupPBSAcmeAccounts { + extraExclude = append(extraExclude, "**/acme/accounts.cfg") + } + if !c.config.BackupPBSAcmePlugins { + extraExclude = append(extraExclude, "**/acme/plugins.cfg") + } + if !c.config.BackupPBSMetricServers { + extraExclude = append(extraExclude, "metricserver.cfg") + } + if !c.config.BackupPBSTrafficControl { + extraExclude = append(extraExclude, "traffic-control.cfg") + } if !c.config.BackupUserConfigs { // User-related configs are intentionally excluded together. extraExclude = append(extraExclude, "user.cfg", "acl.cfg", "domains.cfg") @@ -198,15 +224,15 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro if !c.config.BackupSyncJobs { extraExclude = append(extraExclude, "sync.cfg") } - if !c.config.BackupVerificationJobs { - extraExclude = append(extraExclude, "verification.cfg") - } - if !c.config.BackupTapeConfigs { - extraExclude = append(extraExclude, "tape.cfg", "tape-job.cfg", "media-pool.cfg", "tape-encryption-keys.json") - } - if !c.config.BackupNetworkConfigs { - extraExclude = append(extraExclude, "network.cfg") - } + if !c.config.BackupVerificationJobs { + extraExclude = append(extraExclude, "verification.cfg") + } + if !c.config.BackupTapeConfigs { + extraExclude = append(extraExclude, "tape.cfg", "tape-job.cfg", "media-pool.cfg", "tape-encryption-keys.json") + } + if !c.config.BackupPBSNetworkConfig { + extraExclude = append(extraExclude, "network.cfg") + } if !c.config.BackupPruneSchedules { extraExclude = append(extraExclude, "prune.cfg") } @@ -229,75 +255,75 @@ func (c *Collector) collectPBSDirectories(ctx context.Context, root string) erro c.logger.Info("Collecting PBS configuration files:") - // Datastore configuration - c.pbsManifest["datastore.cfg"] = c.collectPBSConfigFile(ctx, root, "datastore.cfg", - "Datastore configuration", c.config.BackupDatastoreConfigs) + // Datastore configuration + c.pbsManifest["datastore.cfg"] = c.collectPBSConfigFile(ctx, root, "datastore.cfg", + "Datastore configuration", c.config.BackupDatastoreConfigs, "BACKUP_DATASTORE_CONFIGS") - // S3 endpoint configuration (used by S3 datastores) - c.pbsManifest["s3.cfg"] = c.collectPBSConfigFile(ctx, root, "s3.cfg", - "S3 endpoints", c.config.BackupDatastoreConfigs) + // S3 endpoint configuration (used by S3 datastores) + c.pbsManifest["s3.cfg"] = c.collectPBSConfigFile(ctx, root, "s3.cfg", + "S3 endpoints", c.config.BackupDatastoreConfigs && c.config.BackupPBSS3Endpoints, "BACKUP_PBS_S3_ENDPOINTS") - // Node configuration (global PBS settings) - c.pbsManifest["node.cfg"] = c.collectPBSConfigFile(ctx, root, "node.cfg", - "Node configuration", true) + // Node configuration (global PBS settings) + c.pbsManifest["node.cfg"] = c.collectPBSConfigFile(ctx, root, "node.cfg", + "Node configuration", c.config.BackupPBSNodeConfig, "BACKUP_PBS_NODE_CONFIG") - // ACME configuration (accounts/plugins) - c.pbsManifest["acme/accounts.cfg"] = c.collectPBSConfigFile(ctx, root, filepath.Join("acme", "accounts.cfg"), - "ACME accounts", true) - c.pbsManifest["acme/plugins.cfg"] = c.collectPBSConfigFile(ctx, root, filepath.Join("acme", "plugins.cfg"), - "ACME plugins", true) + // ACME configuration (accounts/plugins) + c.pbsManifest["acme/accounts.cfg"] = c.collectPBSConfigFile(ctx, root, filepath.Join("acme", "accounts.cfg"), + "ACME accounts", c.config.BackupPBSAcmeAccounts, "BACKUP_PBS_ACME_ACCOUNTS") + c.pbsManifest["acme/plugins.cfg"] = c.collectPBSConfigFile(ctx, root, filepath.Join("acme", "plugins.cfg"), + "ACME plugins", c.config.BackupPBSAcmePlugins, "BACKUP_PBS_ACME_PLUGINS") - // External metric servers - c.pbsManifest["metricserver.cfg"] = c.collectPBSConfigFile(ctx, root, "metricserver.cfg", - "External metric servers", true) + // External metric servers + c.pbsManifest["metricserver.cfg"] = c.collectPBSConfigFile(ctx, root, "metricserver.cfg", + "External metric servers", c.config.BackupPBSMetricServers, "BACKUP_PBS_METRIC_SERVERS") - // Traffic control - c.pbsManifest["traffic-control.cfg"] = c.collectPBSConfigFile(ctx, root, "traffic-control.cfg", - "Traffic control rules", true) + // Traffic control + c.pbsManifest["traffic-control.cfg"] = c.collectPBSConfigFile(ctx, root, "traffic-control.cfg", + "Traffic control rules", c.config.BackupPBSTrafficControl, "BACKUP_PBS_TRAFFIC_CONTROL") - // User configuration - c.pbsManifest["user.cfg"] = c.collectPBSConfigFile(ctx, root, "user.cfg", - "User configuration", c.config.BackupUserConfigs) + // User configuration + c.pbsManifest["user.cfg"] = c.collectPBSConfigFile(ctx, root, "user.cfg", + "User configuration", c.config.BackupUserConfigs, "BACKUP_USER_CONFIGS") // ACL configuration (under user configs flag) c.pbsManifest["acl.cfg"] = c.collectPBSConfigFile(ctx, root, "acl.cfg", - "ACL configuration", c.config.BackupUserConfigs) + "ACL configuration", c.config.BackupUserConfigs, "BACKUP_USER_CONFIGS") // Remote configuration (for sync jobs) c.pbsManifest["remote.cfg"] = c.collectPBSConfigFile(ctx, root, "remote.cfg", - "Remote configuration", c.config.BackupRemoteConfigs) + "Remote configuration", c.config.BackupRemoteConfigs, "BACKUP_REMOTE_CONFIGS") // Sync jobs configuration c.pbsManifest["sync.cfg"] = c.collectPBSConfigFile(ctx, root, "sync.cfg", - "Sync jobs", c.config.BackupSyncJobs) + "Sync jobs", c.config.BackupSyncJobs, "BACKUP_SYNC_JOBS") // Verification jobs configuration c.pbsManifest["verification.cfg"] = c.collectPBSConfigFile(ctx, root, "verification.cfg", - "Verification jobs", c.config.BackupVerificationJobs) + "Verification jobs", c.config.BackupVerificationJobs, "BACKUP_VERIFICATION_JOBS") - // Tape backup configuration - c.pbsManifest["tape.cfg"] = c.collectPBSConfigFile(ctx, root, "tape.cfg", - "Tape configuration", c.config.BackupTapeConfigs) + // Tape backup configuration + c.pbsManifest["tape.cfg"] = c.collectPBSConfigFile(ctx, root, "tape.cfg", + "Tape configuration", c.config.BackupTapeConfigs, "BACKUP_TAPE_CONFIGS") - // Tape jobs (under tape configs flag) - c.pbsManifest["tape-job.cfg"] = c.collectPBSConfigFile(ctx, root, "tape-job.cfg", - "Tape jobs", c.config.BackupTapeConfigs) + // Tape jobs (under tape configs flag) + c.pbsManifest["tape-job.cfg"] = c.collectPBSConfigFile(ctx, root, "tape-job.cfg", + "Tape jobs", c.config.BackupTapeConfigs, "BACKUP_TAPE_CONFIGS") - // Media pool configuration (under tape configs flag) - c.pbsManifest["media-pool.cfg"] = c.collectPBSConfigFile(ctx, root, "media-pool.cfg", - "Media pool configuration", c.config.BackupTapeConfigs) + // Media pool configuration (under tape configs flag) + c.pbsManifest["media-pool.cfg"] = c.collectPBSConfigFile(ctx, root, "media-pool.cfg", + "Media pool configuration", c.config.BackupTapeConfigs, "BACKUP_TAPE_CONFIGS") - // Tape encryption keys (under tape configs flag) - c.pbsManifest["tape-encryption-keys.json"] = c.collectPBSConfigFile(ctx, root, "tape-encryption-keys.json", - "Tape encryption keys", c.config.BackupTapeConfigs) + // Tape encryption keys (under tape configs flag) + c.pbsManifest["tape-encryption-keys.json"] = c.collectPBSConfigFile(ctx, root, "tape-encryption-keys.json", + "Tape encryption keys", c.config.BackupTapeConfigs, "BACKUP_TAPE_CONFIGS") // Network configuration c.pbsManifest["network.cfg"] = c.collectPBSConfigFile(ctx, root, "network.cfg", - "Network configuration", c.config.BackupNetworkConfigs) + "Network configuration", c.config.BackupPBSNetworkConfig, "BACKUP_PBS_NETWORK_CONFIG") // Prune/GC schedules c.pbsManifest["prune.cfg"] = c.collectPBSConfigFile(ctx, root, "prune.cfg", - "Prune schedules", c.config.BackupPruneSchedules) + "Prune schedules", c.config.BackupPruneSchedules, "BACKUP_PRUNE_SCHEDULES") c.logger.Debug("PBS directory collection finished") return nil @@ -321,11 +347,13 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData } // Node configuration - c.safeCmdOutput(ctx, - "proxmox-backup-manager node show --output-format=json", - filepath.Join(commandsDir, "node_config.json"), - "Node configuration", - false) + if c.config.BackupPBSNodeConfig { + c.safeCmdOutput(ctx, + "proxmox-backup-manager node show --output-format=json", + filepath.Join(commandsDir, "node_config.json"), + "Node configuration", + false) + } // Datastore status if err := c.collectCommandMulti(ctx, @@ -348,7 +376,9 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData } // ACME (accounts, plugins) - c.collectPBSAcmeSnapshots(ctx, commandsDir) + if c.config.BackupPBSAcmeAccounts || c.config.BackupPBSAcmePlugins { + c.collectPBSAcmeSnapshots(ctx, commandsDir) + } // Notifications (targets, matchers, endpoints) c.collectPBSNotificationSnapshots(ctx, commandsDir) @@ -456,7 +486,7 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData } // Network configuration - if c.config.BackupNetworkConfigs { + if c.config.BackupPBSNetworkConfig { c.safeCmdOutput(ctx, "proxmox-backup-manager network list --output-format=json", filepath.Join(commandsDir, "network_list.json"), @@ -480,11 +510,13 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData } // Traffic control rules - c.safeCmdOutput(ctx, - "proxmox-backup-manager traffic-control list --output-format=json", - filepath.Join(commandsDir, "traffic_control.json"), - "Traffic control rules", - false) + if c.config.BackupPBSTrafficControl { + c.safeCmdOutput(ctx, + "proxmox-backup-manager traffic-control list --output-format=json", + filepath.Join(commandsDir, "traffic_control.json"), + "Traffic control rules", + false) + } // Task log summary c.safeCmdOutput(ctx, @@ -494,7 +526,9 @@ func (c *Collector) collectPBSCommands(ctx context.Context, datastores []pbsData false) // S3 endpoints (optional, may be unavailable on older PBS versions) - c.collectPBSS3Snapshots(ctx, commandsDir) + if c.config.BackupDatastoreConfigs && c.config.BackupPBSS3Endpoints { + c.collectPBSS3Snapshots(ctx, commandsDir) + } return nil } diff --git a/internal/backup/collector_pbs_extra_test.go b/internal/backup/collector_pbs_extra_test.go index 2144e42..d80dae4 100644 --- a/internal/backup/collector_pbs_extra_test.go +++ b/internal/backup/collector_pbs_extra_test.go @@ -219,6 +219,7 @@ func TestCollectPBSConfigsExcludesDisabledPBSConfigFiles(t *testing.T) { cfg.BackupTapeConfigs = false cfg.BackupPruneSchedules = false cfg.BackupNetworkConfigs = false + cfg.BackupPBSNetworkConfig = false cfg.BackupPxarFiles = false collector := NewCollectorWithDeps(newTestLogger(), cfg, t.TempDir(), types.ProxmoxBS, false, CollectorDeps{ @@ -283,7 +284,7 @@ func TestCollectPBSConfigFileReturnsSkippedWhenExcluded(t *testing.T) { collector := NewCollectorWithDeps(newTestLogger(), cfg, t.TempDir(), types.ProxmoxBS, false, CollectorDeps{}) - entry := collector.collectPBSConfigFile(context.Background(), root, "remote.cfg", "Remote configuration", true) + entry := collector.collectPBSConfigFile(context.Background(), root, "remote.cfg", "Remote configuration", true, "BACKUP_REMOTE_CONFIGS") if entry.Status != StatusSkipped { t.Fatalf("expected StatusSkipped, got %s", entry.Status) } diff --git a/internal/config/config.go b/internal/config/config.go index 91ecfbf..a4b250a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -200,11 +200,18 @@ type Config struct { // PBS-specific collection options BackupDatastoreConfigs bool + BackupPBSS3Endpoints bool + BackupPBSNodeConfig bool + BackupPBSAcmeAccounts bool + BackupPBSAcmePlugins bool + BackupPBSMetricServers bool + BackupPBSTrafficControl bool BackupUserConfigs bool BackupRemoteConfigs bool BackupSyncJobs bool BackupVerificationJobs bool BackupTapeConfigs bool + BackupPBSNetworkConfig bool BackupPruneSchedules bool BackupPxarFiles bool PxarDatastoreConcurrency int @@ -667,11 +674,19 @@ func (c *Config) parsePVESettings() error { func (c *Config) parsePBSSettings() { c.BackupDatastoreConfigs = c.getBool("BACKUP_DATASTORE_CONFIGS", true) + c.BackupPBSS3Endpoints = c.getBool("BACKUP_PBS_S3_ENDPOINTS", c.BackupDatastoreConfigs) + c.BackupPBSNodeConfig = c.getBool("BACKUP_PBS_NODE_CONFIG", true) + c.BackupPBSAcmeAccounts = c.getBool("BACKUP_PBS_ACME_ACCOUNTS", true) + c.BackupPBSAcmePlugins = c.getBool("BACKUP_PBS_ACME_PLUGINS", true) + c.BackupPBSMetricServers = c.getBool("BACKUP_PBS_METRIC_SERVERS", true) + c.BackupPBSTrafficControl = c.getBool("BACKUP_PBS_TRAFFIC_CONTROL", true) c.BackupUserConfigs = c.getBool("BACKUP_USER_CONFIGS", true) c.BackupRemoteConfigs = c.getBoolWithFallback([]string{"BACKUP_REMOTE_CONFIGS", "BACKUP_REMOTE_CFG"}, true) c.BackupSyncJobs = c.getBool("BACKUP_SYNC_JOBS", true) c.BackupVerificationJobs = c.getBool("BACKUP_VERIFICATION_JOBS", true) c.BackupTapeConfigs = c.getBool("BACKUP_TAPE_CONFIGS", true) + networkFallback := c.getBoolWithFallback([]string{"BACKUP_NETWORK_CONFIGS", "BACKUP_NETWORK_CONFIG"}, true) + c.BackupPBSNetworkConfig = c.getBool("BACKUP_PBS_NETWORK_CONFIG", networkFallback) c.BackupPruneSchedules = c.getBool("BACKUP_PRUNE_SCHEDULES", true) c.BackupPxarFiles = c.getBoolWithFallback([]string{"PXAR_SCAN_ENABLE", "BACKUP_PXAR_FILES"}, true) c.PxarDatastoreConcurrency = c.getInt("PXAR_SCAN_DS_CONCURRENCY", 3) diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 98cbcd0..20cc63e 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -289,11 +289,18 @@ BACKUP_VM_CONFIGS=true # PBS BACKUP_DATASTORE_CONFIGS=true +BACKUP_PBS_S3_ENDPOINTS=true # s3.cfg (S3 endpoints, used by S3 datastores) +BACKUP_PBS_NODE_CONFIG=true # node.cfg (global PBS settings) +BACKUP_PBS_ACME_ACCOUNTS=true # acme/accounts.cfg +BACKUP_PBS_ACME_PLUGINS=true # acme/plugins.cfg +BACKUP_PBS_METRIC_SERVERS=true # metricserver.cfg +BACKUP_PBS_TRAFFIC_CONTROL=true # traffic-control.cfg BACKUP_USER_CONFIGS=true BACKUP_REMOTE_CONFIGS=true BACKUP_SYNC_JOBS=true BACKUP_VERIFICATION_JOBS=true BACKUP_TAPE_CONFIGS=true +BACKUP_PBS_NETWORK_CONFIG=true # network.cfg (PBS), independent from BACKUP_NETWORK_CONFIGS (system) BACKUP_PRUNE_SCHEDULES=true PXAR_SCAN_ENABLE=false PXAR_SCAN_DS_CONCURRENCY=3 # Number of datastores scanned in parallel for PXAR metadata diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 1a4fceb..ab7a342 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=21807 +pid=71026 host=pve -time=2026-02-05T11:58:08+01:00 +time=2026-02-05T13:15:55+01:00 diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index 6bc4db6..ddab036 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -1476,11 +1476,18 @@ func applyCollectorOverrides(cc *backup.CollectorConfig, cfg *config.Config) { cc.CephConfigPath = cfg.CephConfigPath cc.BackupDatastoreConfigs = cfg.BackupDatastoreConfigs + cc.BackupPBSS3Endpoints = cfg.BackupPBSS3Endpoints + cc.BackupPBSNodeConfig = cfg.BackupPBSNodeConfig + cc.BackupPBSAcmeAccounts = cfg.BackupPBSAcmeAccounts + cc.BackupPBSAcmePlugins = cfg.BackupPBSAcmePlugins + cc.BackupPBSMetricServers = cfg.BackupPBSMetricServers + cc.BackupPBSTrafficControl = cfg.BackupPBSTrafficControl cc.BackupUserConfigs = cfg.BackupUserConfigs cc.BackupRemoteConfigs = cfg.BackupRemoteConfigs cc.BackupSyncJobs = cfg.BackupSyncJobs cc.BackupVerificationJobs = cfg.BackupVerificationJobs cc.BackupTapeConfigs = cfg.BackupTapeConfigs + cc.BackupPBSNetworkConfig = cfg.BackupPBSNetworkConfig cc.BackupPruneSchedules = cfg.BackupPruneSchedules cc.BackupPxarFiles = cfg.BackupPxarFiles From 1fff7b6ae8f6408d1cb0ad9c43eea45b906b4990 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 14:32:49 +0100 Subject: [PATCH 4/9] Improve PVE manifest logging and cluster handling Refactor populatePVEManifest to always initialize the manifest and introduce manifestLogOpts + logEntry for consistent, descriptive logging and counting of missing files. Update record(...) calls to include descriptions, disable hints, and not-found counting for PVE config items. Rework collectPVEDirectories cluster logic to separate BackupClusterConfig checks from clustered/node-specific actions, use c.systemPath for authkey, and adjust copy/skip logging behavior. Tune log levels/messages for firewall, VZDump and Ceph detection. Add a note in the backup.env template explaining that enabled-but-unconfigured features emit warnings and can be disabled via BACKUP_* flags. --- internal/backup/collector_pve.go | 188 +++++++++++++++++++++------ internal/config/templates/backup.env | 1 + internal/orchestrator/.backup.lock | 4 +- 3 files changed, 151 insertions(+), 42 deletions(-) diff --git a/internal/backup/collector_pve.go b/internal/backup/collector_pve.go index 30b448a..f554f11 100644 --- a/internal/backup/collector_pve.go +++ b/internal/backup/collector_pve.go @@ -157,17 +157,63 @@ func (c *Collector) populatePVEManifest() { if c == nil || c.config == nil { return } - if c.pveManifest == nil { - c.pveManifest = make(map[string]ManifestEntry) + c.pveManifest = make(map[string]ManifestEntry) + + type manifestLogOpts struct { + description string + disableHint string + log bool + countNotFound bool } - record := func(src string, enabled bool) { + logEntry := func(opts manifestLogOpts, entry ManifestEntry) { + if !opts.log || strings.TrimSpace(opts.description) == "" { + return + } + switch entry.Status { + case StatusCollected: + if entry.Size > 0 { + c.logger.Info(" %s: collected (%s)", opts.description, FormatBytes(entry.Size)) + } else { + c.logger.Info(" %s: collected", opts.description) + } + case StatusDisabled: + if strings.TrimSpace(opts.disableHint) != "" { + c.logger.Info(" %s: disabled (%s=false)", opts.description, opts.disableHint) + } else { + c.logger.Info(" %s: disabled", opts.description) + } + case StatusSkipped: + c.logger.Info(" %s: skipped (excluded)", opts.description) + case StatusNotFound: + if strings.TrimSpace(opts.disableHint) != "" { + c.logger.Warning(" %s: not configured. If unused, set %s=false to disable.", opts.description, opts.disableHint) + } else { + c.logger.Warning(" %s: not configured", opts.description) + } + case StatusFailed: + if strings.TrimSpace(entry.Error) != "" { + c.logger.Warning(" %s: failed - %s", opts.description, entry.Error) + } else { + c.logger.Warning(" %s: failed", opts.description) + } + default: + c.logger.Warning(" %s: failed - unknown status %s", opts.description, entry.Status) + } + } + + record := func(src string, enabled bool, opts manifestLogOpts) { if src == "" { return } dest := c.targetPathFor(src) key := pveManifestKey(c.tempDir, dest) - c.pveManifest[key] = c.describePathForManifest(src, dest, enabled) + entry := c.describePathForManifest(src, dest, enabled) + c.pveManifest[key] = entry + if opts.countNotFound && entry.Status == StatusNotFound { + c.incFilesNotFound() + } + logEntry(opts, entry) } pveConfigPath := c.effectivePVEConfigPath() @@ -175,12 +221,29 @@ func (c *Collector) populatePVEManifest() { return } + c.logger.Info("Collecting PVE configuration files:") + // VM/CT configuration directories. - record(filepath.Join(pveConfigPath, "qemu-server"), c.config.BackupVMConfigs) - record(filepath.Join(pveConfigPath, "lxc"), c.config.BackupVMConfigs) + record(filepath.Join(pveConfigPath, "qemu-server"), c.config.BackupVMConfigs, manifestLogOpts{ + description: "VM configurations", + disableHint: "BACKUP_VM_CONFIGS", + log: true, + countNotFound: true, + }) + record(filepath.Join(pveConfigPath, "lxc"), c.config.BackupVMConfigs, manifestLogOpts{ + description: "Container configurations", + disableHint: "BACKUP_VM_CONFIGS", + log: true, + countNotFound: true, + }) // Firewall configuration. - record(filepath.Join(pveConfigPath, "firewall"), c.config.BackupPVEFirewall) + record(filepath.Join(pveConfigPath, "firewall"), c.config.BackupPVEFirewall, manifestLogOpts{ + description: "Firewall configuration", + disableHint: "BACKUP_PVE_FIREWALL", + log: true, + countNotFound: true, + }) if c.config.BackupPVEFirewall { nodesDir := filepath.Join(pveConfigPath, "nodes") if entries, err := os.ReadDir(nodesDir); err == nil { @@ -192,24 +255,64 @@ func (c *Collector) populatePVEManifest() { if node == "" { continue } - record(filepath.Join(nodesDir, node, "host.fw"), true) + record(filepath.Join(nodesDir, node, "host.fw"), true, manifestLogOpts{log: false}) } } } // ACL configuration. - record(filepath.Join(pveConfigPath, "user.cfg"), c.config.BackupPVEACL) - record(filepath.Join(pveConfigPath, "acl.cfg"), c.config.BackupPVEACL) - record(filepath.Join(pveConfigPath, "domains.cfg"), c.config.BackupPVEACL) + record(filepath.Join(pveConfigPath, "user.cfg"), c.config.BackupPVEACL, manifestLogOpts{ + description: "User configuration", + disableHint: "BACKUP_PVE_ACL", + log: true, + countNotFound: true, + }) + record(filepath.Join(pveConfigPath, "acl.cfg"), c.config.BackupPVEACL, manifestLogOpts{ + description: "ACL configuration", + disableHint: "BACKUP_PVE_ACL", + log: true, + countNotFound: true, + }) + record(filepath.Join(pveConfigPath, "domains.cfg"), c.config.BackupPVEACL, manifestLogOpts{ + description: "Domain configuration", + disableHint: "BACKUP_PVE_ACL", + log: true, + countNotFound: true, + }) // Scheduled jobs. - record(filepath.Join(pveConfigPath, "jobs.cfg"), c.config.BackupPVEJobs) - record(filepath.Join(pveConfigPath, "vzdump.cron"), c.config.BackupPVEJobs) + record(filepath.Join(pveConfigPath, "jobs.cfg"), c.config.BackupPVEJobs, manifestLogOpts{ + description: "Job configuration", + disableHint: "BACKUP_PVE_JOBS", + log: true, + countNotFound: true, + }) + record(filepath.Join(pveConfigPath, "vzdump.cron"), c.config.BackupPVEJobs, manifestLogOpts{ + description: "VZDump cron", + disableHint: "BACKUP_PVE_JOBS", + log: true, + countNotFound: true, + }) // Cluster configuration. - record(c.effectiveCorosyncConfigPath(), c.config.BackupClusterConfig) - record(filepath.Join(c.effectivePVEClusterPath(), "config.db"), c.config.BackupClusterConfig) - record("/etc/corosync/authkey", c.config.BackupClusterConfig) + record(c.effectiveCorosyncConfigPath(), c.config.BackupClusterConfig, manifestLogOpts{ + description: "Corosync configuration", + disableHint: "BACKUP_CLUSTER_CONFIG", + log: true, + countNotFound: true, + }) + record(filepath.Join(c.effectivePVEClusterPath(), "config.db"), c.config.BackupClusterConfig, manifestLogOpts{ + description: "PVE cluster database", + disableHint: "BACKUP_CLUSTER_CONFIG", + log: true, + countNotFound: true, + }) + record(c.systemPath("/etc/corosync/authkey"), c.config.BackupClusterConfig, manifestLogOpts{ + description: "Corosync authkey", + disableHint: "BACKUP_CLUSTER_CONFIG", + log: true, + countNotFound: true, + }) // VZDump configuration. vzdumpPath := c.config.VzdumpConfigPath @@ -218,7 +321,12 @@ func (c *Collector) populatePVEManifest() { } else if !filepath.IsAbs(vzdumpPath) { vzdumpPath = filepath.Join(pveConfigPath, vzdumpPath) } - record(vzdumpPath, c.config.BackupVZDumpConfig) + record(vzdumpPath, c.config.BackupVZDumpConfig, manifestLogOpts{ + description: "VZDump configuration", + disableHint: "BACKUP_VZDUMP_CONFIG", + log: true, + countNotFound: true, + }) } func pveManifestKey(tempDir, dest string) string { @@ -310,7 +418,7 @@ func (c *Collector) collectPVEDirectories(ctx context.Context, clustered bool) e // Cluster configuration (if clustered) clusterPath := c.effectivePVEClusterPath() - if c.config.BackupClusterConfig && clustered { + if c.config.BackupClusterConfig { corosyncPath := c.config.CorosyncConfigPath if corosyncPath == "" { corosyncPath = filepath.Join(pveConfigPath, "corosync.conf") @@ -324,31 +432,31 @@ func (c *Collector) collectPVEDirectories(ctx context.Context, clustered bool) e c.logger.Warning("Failed to copy corosync.conf: %v", err) } - authkeySrc := "/etc/corosync/authkey" + if clustered { + // Cluster directory + if err := c.safeCopyDir(ctx, + clusterPath, + c.targetPathFor(clusterPath), + "PVE cluster data"); err != nil { + c.logger.Warning("Failed to copy cluster data: %v", err) + } + } else { + c.logger.Debug("PVE cluster not configured (single node) - skipping cluster data directory snapshot") + } + } else { + c.logger.Skip("PVE cluster backup disabled") + c.logger.Skip("Corosync configuration") + } + + if c.config.BackupClusterConfig { + authkeySrc := c.systemPath("/etc/corosync/authkey") if err := c.safeCopyFile(ctx, authkeySrc, c.targetPathFor(authkeySrc), - "Corosync authkey"); err != nil && !errors.Is(err, os.ErrNotExist) { + "Corosync authkey"); err != nil { c.logger.Warning("Failed to copy Corosync authkey: %v", err) } - // Cluster directory - if err := c.safeCopyDir(ctx, - clusterPath, - c.targetPathFor(clusterPath), - "PVE cluster data"); err != nil { - c.logger.Warning("Failed to copy cluster data: %v", err) - } - } else { - if !c.config.BackupClusterConfig { - c.logger.Skip("PVE cluster backup disabled") - c.logger.Skip("Corosync configuration") - } else { - c.logger.Info("PVE cluster not configured (single node) - skipping Corosync configuration") - } - } - - if c.config.BackupClusterConfig { // Always attempt to capture config.db even on standalone nodes when cluster config is enabled. configDB := filepath.Join(clusterPath, "config.db") if info, err := os.Stat(configDB); err == nil && !info.IsDir() { @@ -384,7 +492,7 @@ func (c *Collector) collectPVEDirectories(ctx context.Context, clustered bool) e } } } else if errors.Is(err, os.ErrNotExist) { - c.logger.Info("PVE firewall configuration not found (no rules configured) - skipping") + c.logger.Debug("PVE firewall configuration not found (no rules configured) - skipping") } else { c.logger.Warning("Failed to access firewall configuration %s: %v", firewallSrc, err) } @@ -405,7 +513,7 @@ func (c *Collector) collectPVEDirectories(ctx context.Context, clustered bool) e vzdumpPath, c.targetPathFor(vzdumpPath), "VZDump configuration"); err != nil { - c.logger.Debug("No vzdump.conf found") + c.logger.Warning("Failed to copy VZDump configuration: %v", err) } } else { c.logger.Skip("VZDump configuration backup disabled.") @@ -1313,7 +1421,7 @@ func (c *Collector) collectPVECephInfo(ctx context.Context) error { } if !c.isCephConfigured(ctx) { - c.logger.Debug("Ceph not detected on this node, skipping Ceph collection") + c.logger.Warning("Skipping Ceph collection: not detected. If unused, set BACKUP_CEPH_CONFIG=false to disable.") return nil } diff --git a/internal/config/templates/backup.env b/internal/config/templates/backup.env index 20cc63e..9343fb6 100644 --- a/internal/config/templates/backup.env +++ b/internal/config/templates/backup.env @@ -272,6 +272,7 @@ METRICS_PATH=${BASE_DIR}/metrics # PVE # BACKUP_CLUSTER_CONFIG controls both cluster config collection and cluster runtime commands # (corosync.conf / pve-cluster data, pvecm status/nodes, and HA status collection). +# NOTE: If a feature is enabled but not configured on this node, ProxSave logs a WARNING (exit code 1). Disable unused features via the BACKUP_* flags below. BACKUP_CLUSTER_CONFIG=true BACKUP_PVE_FIREWALL=true BACKUP_VZDUMP_CONFIG=true diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index ab7a342..1b77ac4 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=71026 +pid=99222 host=pve -time=2026-02-05T13:15:55+01:00 +time=2026-02-05T13:58:24+01:00 From 8d76e0681e56063c8151017bba5deb649eb1408d Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 16:15:35 +0100 Subject: [PATCH 5/9] Remove BASE_DIR & CRON_* from backup.env Stop writing BASE_DIR and CRON_* values into backup.env since BASE_DIR is now auto-detected and cron scheduling is managed via crontab. Changes include: - CLI and TUI installers no longer set BASE_DIR or CRON_* in generated env files; added unsetEnvValue helper to strip keys from templates. - Config upgrade now prunes deprecated keys (BASE_DIR, CRON_SCHEDULE, CRON_HOUR, CRON_MINUTE), emits a warning, and only rewrites the file when there are missing keys or pruned lines. - Added unit tests to verify pruning behavior and updated TUI tests to expect these keys to be removed. - Documentation updated to note BASE_DIR is auto-detected. - Minor update to internal/orchestrator/.backup.lock (runtime metadata). --- cmd/proxsave/install.go | 4 -- docs/CONFIGURATION.md | 5 +- internal/config/upgrade.go | 52 +++++++++++++- internal/config/upgrade_test.go | 103 ++++++++++++++++++++++++++++ internal/orchestrator/.backup.lock | 4 +- internal/tui/wizard/install.go | 56 +++++++++++---- internal/tui/wizard/install_test.go | 14 ++-- 7 files changed, 207 insertions(+), 31 deletions(-) diff --git a/cmd/proxsave/install.go b/cmd/proxsave/install.go index d52e6dd..d95e881 100644 --- a/cmd/proxsave/install.go +++ b/cmd/proxsave/install.go @@ -324,10 +324,6 @@ func runConfigWizardCLI(ctx context.Context, reader *bufio.Reader, configPath, t return false, false, wrapInstallError(err) } - // Ensure BASE_DIR is explicitly present in the generated env file so that - // subsequent runs and encryption setup use the same root directory. - template = setEnvValue(template, "BASE_DIR", baseDir) - logging.DebugStepBootstrap(bootstrap, "install config wizard (cli)", "writing configuration") if err := writeConfigFile(configPath, tmpConfigPath, template); err != nil { return false, false, err diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 868c47f..1c821fb 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -213,8 +213,9 @@ MIN_DISK_SPACE_CLOUD_GB=1 # Cloud storage (not enforced for remote) ## Storage Paths ```bash -# Base directory for all operations -BASE_DIR=/opt/proxsave +# Base directory for all operations (auto-detected at runtime) +# BASE_DIR is derived from the executable/config location, so it is usually not +# written in backup.env. # Lock file directory LOCK_PATH=${BASE_DIR}/lock diff --git a/internal/config/upgrade.go b/internal/config/upgrade.go index 5db19e7..755f12c 100644 --- a/internal/config/upgrade.go +++ b/internal/config/upgrade.go @@ -173,6 +173,47 @@ func computeConfigUpgrade(configPath string) (*UpgradeResult, string, []byte, er return result, "", originalContent, fmt.Errorf("failed to parse config %s: %w", configPath, err) } + // Prune deprecated keys that are now auto-detected at runtime. + // + // BASE_DIR is derived from the executable/config location. + // CRON_* scheduling is managed via crontab, not backup.env. + deprecatedUpperKeys := map[string]struct{}{ + "BASE_DIR": {}, + "CRON_SCHEDULE": {}, + "CRON_HOUR": {}, + "CRON_MINUTE": {}, + } + skipOriginalLines := make([]bool, len(originalLines)) + prunedLineCount := 0 + prunedKeys := make(map[string]struct{}) + for key, ranges := range userRanges { + upperKey := strings.ToUpper(key) + if _, ok := deprecatedUpperKeys[upperKey]; !ok { + continue + } + prunedKeys[upperKey] = struct{}{} + for _, r := range ranges { + for i := r.start; i <= r.end && i < len(skipOriginalLines); i++ { + if i < 0 { + continue + } + if skipOriginalLines[i] { + continue + } + skipOriginalLines[i] = true + prunedLineCount++ + } + } + } + if len(prunedKeys) > 0 { + keys := make([]string, 0, len(prunedKeys)) + for k := range prunedKeys { + keys = append(keys, k) + } + sort.Strings(keys) + warnings = append(warnings, fmt.Sprintf("Removed deprecated keys from backup.env: %s (BASE_DIR is auto-detected; cron is managed via crontab)", strings.Join(keys, ", "))) + } + // 2. Walk the template line-by-line and collect template entries. template := DefaultEnvTemplate() normalizedTemplate := strings.ReplaceAll(template, "\r\n", "\n") @@ -252,6 +293,10 @@ func computeConfigUpgrade(configPath string) (*UpgradeResult, string, []byte, er caseConflictKeys := make([]string, 0) for _, key := range userKeyOrder { upperKey := strings.ToUpper(key) + if _, ok := deprecatedUpperKeys[upperKey]; ok { + // This key is explicitly pruned and should not be reported as preserved. + continue + } templateKey, ok := templateKeyByUpper[upperKey] if !ok { extraKeys = append(extraKeys, key) @@ -277,8 +322,8 @@ func computeConfigUpgrade(configPath string) (*UpgradeResult, string, []byte, er } } - // If nothing is missing, do not rewrite the file. - if len(missingKeys) == 0 { + // If nothing is missing and nothing is pruned, do not rewrite the file. + if len(missingKeys) == 0 && prunedLineCount == 0 { result.Changed = false result.Warnings = warnings result.MissingKeys = missingKeys @@ -384,6 +429,9 @@ func computeConfigUpgrade(configPath string) (*UpgradeResult, string, []byte, er newLines = append(newLines, ops[opIdx].lines...) opIdx++ } + if i < len(skipOriginalLines) && skipOriginalLines[i] { + continue + } newLines = append(newLines, originalLines[i]) } for opIdx < len(ops) { diff --git a/internal/config/upgrade_test.go b/internal/config/upgrade_test.go index e85d617..467e2e8 100644 --- a/internal/config/upgrade_test.go +++ b/internal/config/upgrade_test.go @@ -114,6 +114,109 @@ func TestUpgradeConfigCreatesBackupAndPreservesExtraKeys(t *testing.T) { }) } +func TestUpgradeConfigPrunesBaseDir(t *testing.T) { + template := "KEY1=default\n" + withTemplate(t, template, func() { + tmpDir := t.TempDir() + configPath := filepath.Join(tmpDir, "backup.env") + content := "BASE_DIR=/opt/proxsave\nKEY1=custom\n" + if err := os.WriteFile(configPath, []byte(content), 0600); err != nil { + t.Fatalf("failed to seed config: %v", err) + } + + plan, err := PlanUpgradeConfigFile(configPath) + if err != nil { + t.Fatalf("PlanUpgradeConfigFile returned error: %v", err) + } + if !plan.Changed { + t.Fatal("expected result.Changed=true when BASE_DIR is pruned") + } + if len(plan.MissingKeys) != 0 { + t.Fatalf("MissingKeys = %v; want []", plan.MissingKeys) + } + if len(plan.ExtraKeys) != 0 { + t.Fatalf("ExtraKeys = %v; want [] (BASE_DIR should be pruned, not preserved)", plan.ExtraKeys) + } + warnings := strings.Join(plan.Warnings, "\n") + if !strings.Contains(warnings, "BASE_DIR") { + t.Fatalf("expected warnings to mention BASE_DIR pruning, got: %s", warnings) + } + + result, err := UpgradeConfigFile(configPath) + if err != nil { + t.Fatalf("UpgradeConfigFile returned error: %v", err) + } + if !result.Changed { + t.Fatal("expected result.Changed=true when BASE_DIR is pruned") + } + if result.BackupPath == "" { + t.Fatal("expected BackupPath to be populated after pruning") + } + + data, err := os.ReadFile(configPath) + if err != nil { + t.Fatalf("failed to read upgraded config: %v", err) + } + updated := string(data) + if strings.Contains(updated, "BASE_DIR=") { + t.Fatalf("expected BASE_DIR to be removed, got:\n%s", updated) + } + if !strings.Contains(updated, "KEY1=custom") { + t.Fatalf("expected KEY1 preserved, got:\n%s", updated) + } + }) +} + +func TestUpgradeConfigPrunesCronKeys(t *testing.T) { + template := "KEY1=default\n" + withTemplate(t, template, func() { + tmpDir := t.TempDir() + configPath := filepath.Join(tmpDir, "backup.env") + content := "CRON_SCHEDULE=00 02 * * *\nCRON_HOUR=02\nCRON_MINUTE=00\nKEY1=custom\n" + if err := os.WriteFile(configPath, []byte(content), 0600); err != nil { + t.Fatalf("failed to seed config: %v", err) + } + + plan, err := PlanUpgradeConfigFile(configPath) + if err != nil { + t.Fatalf("PlanUpgradeConfigFile returned error: %v", err) + } + if !plan.Changed { + t.Fatal("expected result.Changed=true when CRON_* keys are pruned") + } + if len(plan.MissingKeys) != 0 { + t.Fatalf("MissingKeys = %v; want []", plan.MissingKeys) + } + if len(plan.ExtraKeys) != 0 { + t.Fatalf("ExtraKeys = %v; want [] (CRON_* should be pruned, not preserved)", plan.ExtraKeys) + } + warnings := strings.Join(plan.Warnings, "\n") + if !strings.Contains(warnings, "CRON_SCHEDULE") { + t.Fatalf("expected warnings to mention CRON_* pruning, got: %s", warnings) + } + + result, err := UpgradeConfigFile(configPath) + if err != nil { + t.Fatalf("UpgradeConfigFile returned error: %v", err) + } + if !result.Changed { + t.Fatal("expected result.Changed=true when CRON_* keys are pruned") + } + + data, err := os.ReadFile(configPath) + if err != nil { + t.Fatalf("failed to read upgraded config: %v", err) + } + updated := string(data) + if strings.Contains(updated, "CRON_SCHEDULE=") || strings.Contains(updated, "CRON_HOUR=") || strings.Contains(updated, "CRON_MINUTE=") { + t.Fatalf("expected CRON_* keys to be removed, got:\n%s", updated) + } + if !strings.Contains(updated, "KEY1=custom") { + t.Fatalf("expected KEY1 preserved, got:\n%s", updated) + } + }) +} + func TestPlanUpgradeEmptyPath(t *testing.T) { if _, err := PlanUpgradeConfigFile(" "); err == nil { t.Fatal("expected error for empty config path") diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 1b77ac4..f9db8bd 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=99222 +pid=146756 host=pve -time=2026-02-05T13:58:24+01:00 +time=2026-02-05T15:57:18+01:00 diff --git a/internal/tui/wizard/install.go b/internal/tui/wizard/install.go index e82c220..f1999ca 100644 --- a/internal/tui/wizard/install.go +++ b/internal/tui/wizard/install.go @@ -458,8 +458,12 @@ func ApplyInstallData(baseTemplate string, data *InstallWizardData) (string, err template = config.DefaultEnvTemplate() } - // Apply BASE_DIR - template = setEnvValue(template, "BASE_DIR", data.BaseDir) + // BASE_DIR is auto-detected at runtime from the executable/config location. + // Keep it out of backup.env to avoid pinning the installation to a specific path. + template = unsetEnvValue(template, "BASE_DIR") + template = unsetEnvValue(template, "CRON_SCHEDULE") + template = unsetEnvValue(template, "CRON_HOUR") + template = unsetEnvValue(template, "CRON_MINUTE") // Apply secondary storage if data.EnableSecondaryStorage { @@ -506,19 +510,6 @@ func ApplyInstallData(baseTemplate string, data *InstallWizardData) (string, err template = setEnvValue(template, "EMAIL_ENABLED", "false") } - // Apply cron schedule - cron := strings.TrimSpace(data.CronTime) - if cron == "" { - cron = "02:00" - } - if parts := strings.Split(cron, ":"); len(parts) == 2 { - min := strings.TrimSpace(parts[1]) - hr := strings.TrimSpace(parts[0]) - template = setEnvValue(template, "CRON_SCHEDULE", fmt.Sprintf("%s %s * * *", min, hr)) - template = setEnvValue(template, "CRON_HOUR", hr) - template = setEnvValue(template, "CRON_MINUTE", min) - } - // Apply encryption if data.EnableEncryption { template = setEnvValue(template, "ENCRYPT_ARCHIVE", "true") @@ -534,6 +525,41 @@ func setEnvValue(template, key, value string) string { return utils.SetEnvValue(template, key, value) } +func unsetEnvValue(template, key string) string { + key = strings.TrimSpace(key) + if key == "" { + return template + } + + lines := strings.Split(template, "\n") + out := make([]string, 0, len(lines)) + + for _, line := range lines { + trimmed := strings.TrimSpace(line) + if utils.IsComment(trimmed) { + out = append(out, line) + continue + } + + parts := strings.SplitN(trimmed, "=", 2) + if len(parts) != 2 { + out = append(out, line) + continue + } + + parsedKey := strings.TrimSpace(parts[0]) + if fields := strings.Fields(parsedKey); len(fields) >= 2 && fields[0] == "export" { + parsedKey = fields[1] + } + if strings.EqualFold(parsedKey, key) { + continue + } + out = append(out, line) + } + + return strings.Join(out, "\n") +} + // CheckExistingConfig checks if config file exists and asks how to proceed func CheckExistingConfig(configPath string, buildSig string) (ExistingConfigAction, error) { if _, err := os.Stat(configPath); err == nil { diff --git a/internal/tui/wizard/install_test.go b/internal/tui/wizard/install_test.go index 0b94b8d..4a7c960 100644 --- a/internal/tui/wizard/install_test.go +++ b/internal/tui/wizard/install_test.go @@ -74,7 +74,9 @@ func TestApplyInstallDataRespectsBaseTemplate(t *testing.T) { } assertContains("MARKER", "1") - assertContains("BASE_DIR", data.BaseDir) + if strings.Contains(result, "BASE_DIR=") { + t.Fatalf("expected BASE_DIR to be removed, got:\n%s", result) + } assertContains("SECONDARY_ENABLED", "true") assertContains("SECONDARY_PATH", data.SecondaryPath) assertContains("SECONDARY_LOG_PATH", data.SecondaryLogPath) @@ -95,8 +97,8 @@ func TestApplyInstallDataDefaultsBaseTemplate(t *testing.T) { if err != nil { t.Fatalf("ApplyInstallData returned error: %v", err) } - if !strings.Contains(result, "BASE_DIR="+data.BaseDir) { - t.Fatalf("expected BASE_DIR to be set in default template") + if strings.Contains(result, "BASE_DIR=") { + t.Fatalf("expected BASE_DIR not to be written in default template") } } @@ -124,9 +126,9 @@ func TestApplyInstallDataCronAndNotifications(t *testing.T) { assertContains("TELEGRAM_ENABLED", "false") assertContains("EMAIL_ENABLED", "true") assertContains("EMAIL_DELIVERY_METHOD", "relay") - assertContains("CRON_SCHEDULE", "7 3 * * *") - assertContains("CRON_HOUR", "3") - assertContains("CRON_MINUTE", "7") + if strings.Contains(result, "CRON_SCHEDULE=") || strings.Contains(result, "CRON_HOUR=") || strings.Contains(result, "CRON_MINUTE=") { + t.Fatalf("expected CRON_* keys to be removed from backup.env, got:\n%s", result) + } assertContains("ENCRYPT_ARCHIVE", "false") } From 9ca4126aeee351d434aa98b56b6b7507b1249913 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 17:43:47 +0100 Subject: [PATCH 6/9] Improve network IP parsing and rollback UI Enhance network snapshot IP extraction to return the primary address with CIDR and handle token parsing/whitespace robustly, preferring IPv4 when available. Add unit tests covering IPv4/IPv6/CIDR cases and missing snapshots. Introduce a suppressPVEChecks flag for cluster restores to skip PVE-specific health checks, adjust network health options accordingly, and enrich UI messages with observed/original IP and reconnect host guidance. Update applyNetworkWithRollbackWithUI signature and return the constructed NetworkApplyNotCommittedError for richer reporting. Minor test call adjustment and backup lock timestamp update. --- internal/orchestrator/.backup.lock | 4 +- internal/orchestrator/network_apply.go | 19 +++-- .../network_apply_preflight_rollback_test.go | 1 + .../network_apply_snapshot_test.go | 73 +++++++++++++++++++ .../orchestrator/network_apply_workflow_ui.go | 70 ++++++++++++++++-- internal/orchestrator/restore_workflow_ui.go | 34 +++++++-- 6 files changed, 179 insertions(+), 22 deletions(-) create mode 100644 internal/orchestrator/network_apply_snapshot_test.go diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index f9db8bd..e077879 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=146756 +pid=176534 host=pve -time=2026-02-05T15:57:18+01:00 +time=2026-02-05T17:39:25+01:00 diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index ad1b2e9..7f33fe3 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -68,7 +68,8 @@ func shouldAttemptNetworkApply(plan *RestorePlan) bool { return plan.HasCategoryID("network") } -// extractIPFromSnapshot reads the IP address for a given interface from a network snapshot report file. +// extractIPFromSnapshot reads the primary address (including CIDR) for a given interface +// from a network snapshot report file. // It searches the output section that follows the "$ ip -br addr" command written by writeNetworkSnapshot. func extractIPFromSnapshot(path, iface string) string { if strings.TrimSpace(path) == "" || strings.TrimSpace(iface) == "" { @@ -105,16 +106,24 @@ func extractIPFromSnapshot(path, iface string) string { // "ip -br addr" can print multiple addresses; prefer IPv4 when available. firstIPv6 := "" for _, token := range fields[2:] { - ip := strings.Split(token, "/")[0] - parsed := net.ParseIP(ip) + token = strings.TrimSpace(token) + if token == "" { + continue + } + + ipPart := token + if strings.Contains(ipPart, "/") { + ipPart = strings.SplitN(ipPart, "/", 2)[0] + } + parsed := net.ParseIP(ipPart) if parsed == nil { continue } if parsed.To4() != nil { - return ip + return token } if firstIPv6 == "" { - firstIPv6 = ip + firstIPv6 = token } } if firstIPv6 != "" { diff --git a/internal/orchestrator/network_apply_preflight_rollback_test.go b/internal/orchestrator/network_apply_preflight_rollback_test.go index d5e56ad..dcfbaaf 100644 --- a/internal/orchestrator/network_apply_preflight_rollback_test.go +++ b/internal/orchestrator/network_apply_preflight_rollback_test.go @@ -65,6 +65,7 @@ func TestApplyNetworkWithRollbackWithUI_RollsBackFilesOnPreflightFailure(t *test "", defaultNetworkRollbackTimeout, SystemTypePBS, + false, ) if err == nil || !strings.Contains(err.Error(), "network preflight validation failed") { t.Fatalf("expected preflight error, got %v", err) diff --git a/internal/orchestrator/network_apply_snapshot_test.go b/internal/orchestrator/network_apply_snapshot_test.go new file mode 100644 index 0000000..a4c5cfd --- /dev/null +++ b/internal/orchestrator/network_apply_snapshot_test.go @@ -0,0 +1,73 @@ +package orchestrator + +import ( + "os" + "testing" +) + +func TestExtractIPFromSnapshotPrefersIPv4WithCIDR(t *testing.T) { + origFS := restoreFS + t.Cleanup(func() { restoreFS = origFS }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + snapshot := ` +GeneratedAt: 2026-02-05T16:41:49Z +Label: before + +=== LIVE NETWORK STATE === + +$ ip -br addr +vmbr0 UP 192.168.178.146/24 2a01:db8::1/64 +lo UNKNOWN 127.0.0.1/8 ::1/128 + +$ ip route show +default via 192.168.178.1 dev vmbr0 +` + if err := fakeFS.WriteFile("/snap.txt", []byte(snapshot), 0o600); err != nil { + t.Fatalf("write snapshot: %v", err) + } + + got := extractIPFromSnapshot("/snap.txt", "vmbr0") + if got != "192.168.178.146/24" { + t.Fatalf("got %q want %q", got, "192.168.178.146/24") + } +} + +func TestExtractIPFromSnapshotFallsBackToIPv6WithCIDR(t *testing.T) { + origFS := restoreFS + t.Cleanup(func() { restoreFS = origFS }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + snapshot := ` +$ ip -br addr +vmbr0 UP 2a01:db8::2/64 +` + if err := fakeFS.WriteFile("/snap.txt", []byte(snapshot), 0o600); err != nil { + t.Fatalf("write snapshot: %v", err) + } + + got := extractIPFromSnapshot("/snap.txt", "vmbr0") + if got != "2a01:db8::2/64" { + t.Fatalf("got %q want %q", got, "2a01:db8::2/64") + } +} + +func TestExtractIPFromSnapshotReturnsUnknownWhenMissing(t *testing.T) { + origFS := restoreFS + t.Cleanup(func() { restoreFS = origFS }) + + fakeFS := NewFakeFS() + t.Cleanup(func() { _ = os.RemoveAll(fakeFS.Root) }) + restoreFS = fakeFS + + got := extractIPFromSnapshot("/does-not-exist.txt", "vmbr0") + if got != "unknown" { + t.Fatalf("got %q want %q", got, "unknown") + } +} diff --git a/internal/orchestrator/network_apply_workflow_ui.go b/internal/orchestrator/network_apply_workflow_ui.go index b8a3924..14e931b 100644 --- a/internal/orchestrator/network_apply_workflow_ui.go +++ b/internal/orchestrator/network_apply_workflow_ui.go @@ -172,22 +172,27 @@ func maybeApplyNetworkConfigWithUI(ctx context.Context, ui RestoreWorkflowUI, lo logging.DebugStep(logger, "network safe apply (ui)", "Selected rollback backup: %s", rollbackPath) systemType := SystemTypeUnknown + suppressPVEChecks := false if plan != nil { systemType = plan.SystemType + // In cluster RECOVERY restores, PVE services are intentionally stopped and /etc/pve is unmounted + // until the end of the workflow. PVE UI (8006) and corosync/quorum checks are not meaningful here. + suppressPVEChecks = plan.SystemType == SystemTypePVE && plan.NeedsClusterRestore } - return applyNetworkWithRollbackWithUI(ctx, ui, logger, rollbackPath, networkRollbackPath, stageRoot, archivePath, defaultNetworkRollbackTimeout, systemType) + return applyNetworkWithRollbackWithUI(ctx, ui, logger, rollbackPath, networkRollbackPath, stageRoot, archivePath, defaultNetworkRollbackTimeout, systemType, suppressPVEChecks) } -func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, logger *logging.Logger, rollbackBackupPath, networkRollbackPath, stageRoot, archivePath string, timeout time.Duration, systemType SystemType) (err error) { +func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, logger *logging.Logger, rollbackBackupPath, networkRollbackPath, stageRoot, archivePath string, timeout time.Duration, systemType SystemType, suppressPVEChecks bool) (err error) { done := logging.DebugStart( logger, "network safe apply (ui)", - "rollbackBackup=%s networkRollback=%s timeout=%s systemType=%s stage=%s", + "rollbackBackup=%s networkRollback=%s timeout=%s systemType=%s stage=%s suppressPVEChecks=%v", strings.TrimSpace(rollbackBackupPath), strings.TrimSpace(networkRollbackPath), timeout, systemType, strings.TrimSpace(stageRoot), + suppressPVEChecks, ) defer func() { done(err) }() @@ -401,7 +406,7 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l } logging.DebugStep(logger, "network safe apply (ui)", "Run post-apply health checks") - health := runNetworkHealthChecks(ctx, networkHealthOptions{ + healthOptions := networkHealthOptions{ SystemType: systemType, Logger: logger, CommandTimeout: 3 * time.Second, @@ -409,7 +414,15 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l ForceSSHRouteCheck: false, EnableDNSResolve: true, LocalPortChecks: defaultNetworkPortChecks(systemType), - }) + } + if suppressPVEChecks { + healthOptions.SystemType = SystemTypeUnknown + healthOptions.LocalPortChecks = nil + } + health := runNetworkHealthChecks(ctx, healthOptions) + if suppressPVEChecks { + health.add("PVE service checks", networkHealthOK, "skipped (cluster database restore in progress; services will be restarted after restore completes)") + } logNetworkHealthReport(logger, health) if diagnosticsDir != "" { if path, err := writeNetworkHealthReportFile(diagnosticsDir, health); err != nil { @@ -443,10 +456,52 @@ func applyNetworkWithRollbackWithUI(ctx context.Context, ui RestoreWorkflowUI, l } // Not committed: keep rollback ARMED. + notCommittedErr := buildNetworkApplyNotCommittedError(ctx, logger, iface, handle) if strings.TrimSpace(diagnosticsDir) != "" { - _ = ui.ShowMessage(ctx, "Network rollback armed", fmt.Sprintf("Network configuration not committed.\n\nRollback will run automatically.\n\nDiagnostics saved under:\n%s", diagnosticsDir)) + rollbackState := "Rollback is ARMED and will run automatically." + if notCommittedErr != nil && !notCommittedErr.RollbackArmed { + rollbackState = "Rollback has executed (or marker cleared)." + } + + observed := "unknown" + original := "unknown" + if notCommittedErr != nil { + if v := strings.TrimSpace(notCommittedErr.RestoredIP); v != "" { + observed = v + } + if v := strings.TrimSpace(notCommittedErr.OriginalIP); v != "" { + original = v + } + } + + reconnectHost := "" + if original != "" && original != "unknown" { + reconnectHost = original + if i := strings.Index(reconnectHost, ","); i >= 0 { + reconnectHost = reconnectHost[:i] + } + if i := strings.Index(reconnectHost, "/"); i >= 0 { + reconnectHost = reconnectHost[:i] + } + reconnectHost = strings.TrimSpace(reconnectHost) + } + + var b strings.Builder + b.WriteString("Network configuration not committed.\n\n") + b.WriteString(rollbackState + "\n\n") + b.WriteString(fmt.Sprintf("IP now (after apply): %s\n", observed)) + if original != "unknown" { + b.WriteString(fmt.Sprintf("Expected after rollback: %s\n", original)) + } + if reconnectHost != "" && reconnectHost != "unknown" { + b.WriteString(fmt.Sprintf("Reconnect using: %s\n", reconnectHost)) + } + b.WriteString("\nDiagnostics saved under:\n") + b.WriteString(strings.TrimSpace(diagnosticsDir)) + + _ = ui.ShowMessage(ctx, "Network rollback", b.String()) } - return buildNetworkApplyNotCommittedError(ctx, logger, iface, handle) + return notCommittedErr } func (c *cliWorkflowUI) ConfirmAction(ctx context.Context, title, message, yesLabel, noLabel string, timeout time.Duration, defaultYes bool) (bool, error) { @@ -515,4 +570,3 @@ func (u *tuiWorkflowUI) PromptNetworkCommit(ctx context.Context, remaining time. } return committed, err } - diff --git a/internal/orchestrator/restore_workflow_ui.go b/internal/orchestrator/restore_workflow_ui.go index b07bc37..871ad9b 100644 --- a/internal/orchestrator/restore_workflow_ui.go +++ b/internal/orchestrator/restore_workflow_ui.go @@ -470,12 +470,12 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } if err := runSafeClusterApplyWithUI(ctx, ui, exportRoot, logger, plan); err != nil { - if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { - return err + if errors.Is(err, ErrRestoreAborted) || input.IsAborted(err) { + return err + } + restoreHadWarnings = true + logger.Warning("Cluster SAFE apply completed with errors: %v", err) } - restoreHadWarnings = true - logger.Warning("Cluster SAFE apply completed with errors: %v", err) - } } } @@ -615,12 +615,25 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l if errors.Is(err, ErrNetworkApplyNotCommitted) { var notCommitted *NetworkApplyNotCommittedError observedIP := "unknown" + originalIP := "unknown" + reconnectHost := "" rollbackLog := "" rollbackArmed := false if errors.As(err, ¬Committed) && notCommitted != nil { if strings.TrimSpace(notCommitted.RestoredIP) != "" { observedIP = strings.TrimSpace(notCommitted.RestoredIP) } + if strings.TrimSpace(notCommitted.OriginalIP) != "" { + originalIP = strings.TrimSpace(notCommitted.OriginalIP) + reconnectHost = originalIP + if i := strings.Index(reconnectHost, ","); i >= 0 { + reconnectHost = reconnectHost[:i] + } + if i := strings.Index(reconnectHost, "/"); i >= 0 { + reconnectHost = reconnectHost[:i] + } + reconnectHost = strings.TrimSpace(reconnectHost) + } rollbackLog = strings.TrimSpace(notCommitted.RollbackLog) rollbackArmed = notCommitted.RollbackArmed lastRestoreAbortInfo = &RestoreAbortInfo{ @@ -633,9 +646,16 @@ func runRestoreWorkflowWithUI(ctx context.Context, cfg *config.Config, logger *l } } if rollbackArmed { - logger.Warning("Network apply not committed; rollback is ARMED and will run automatically. Current IP: %s", observedIP) + logger.Warning("Network apply not committed; rollback is ARMED and will run automatically.") + } else { + logger.Warning("Network apply not committed; rollback has executed (or marker cleared).") + } + if reconnectHost != "" && reconnectHost != "unknown" && originalIP != "unknown" { + logger.Warning("IP now (after apply): %s. Expected after rollback: %s. Reconnect using: %s", observedIP, originalIP, reconnectHost) + } else if originalIP != "unknown" { + logger.Warning("IP now (after apply): %s. Expected after rollback: %s", observedIP, originalIP) } else { - logger.Warning("Network apply not committed; rollback has executed (or marker cleared). Current IP: %s", observedIP) + logger.Warning("IP now (after apply): %s", observedIP) } if rollbackLog != "" { logger.Info("Rollback log: %s", rollbackLog) From dd12858aecb377f650c2fc617cc432492ce19dd7 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 18:36:47 +0100 Subject: [PATCH 7/9] Remove live countdowns from prompts Replace live-updating countdown UI in CLI and TUI prompts with a single-line deadline display and simplified input handling. Prompts now show a human-readable deadline (HH:MM:SS) and total seconds, and use ReadLineWithContext directly instead of a ticker + goroutine/select loop to avoid issues on terminals that don't handle repeated carriage-return updates (e.g. IPMI/serial). Logging and timeout/cancel handling preserved; TUI text updated to include the deadline. --- internal/orchestrator/.backup.lock | 4 +- internal/orchestrator/network_apply.go | 72 +++++++-------------- internal/orchestrator/prompts_cli.go | 90 +++++++++++--------------- internal/orchestrator/restore_tui.go | 8 ++- 4 files changed, 66 insertions(+), 108 deletions(-) diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index e077879..78a1e81 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=176534 +pid=192209 host=pve -time=2026-02-05T17:39:25+01:00 +time=2026-02-05T18:34:26+01:00 diff --git a/internal/orchestrator/network_apply.go b/internal/orchestrator/network_apply.go index 7f33fe3..dec3303 100644 --- a/internal/orchestrator/network_apply.go +++ b/internal/orchestrator/network_apply.go @@ -538,63 +538,35 @@ func promptNetworkCommitWithCountdown(ctx context.Context, reader *bufio.Reader, deadline := time.Now().Add(remaining) logging.DebugStep(logger, "prompt commit", "Deadline set: %s", deadline.Format(time.RFC3339)) + deadlineHHMMSS := deadline.Format("15:04:05") + timeoutSeconds := int(remaining.Seconds()) - fmt.Printf("Type COMMIT within %d seconds to keep the new network configuration.\n", int(remaining.Seconds())) + fmt.Printf("Type COMMIT within %d seconds (deadline: %s) to keep the new network configuration.\n", timeoutSeconds, deadlineHHMMSS) ctxTimeout, cancel := context.WithDeadline(ctx, deadline) defer cancel() - inputCh := make(chan string, 1) - errCh := make(chan error, 1) - - logging.DebugStep(logger, "prompt commit", "Starting input reader goroutine") - go func() { - line, err := input.ReadLineWithContext(ctxTimeout, reader) - if err != nil { - errCh <- err - return - } - inputCh <- line - }() - - ticker := time.NewTicker(1 * time.Second) - defer ticker.Stop() - - logging.DebugStep(logger, "prompt commit", "Waiting for user input...") - - for { - select { - case <-ticker.C: - left := time.Until(deadline) - if left < 0 { - left = 0 - } - fmt.Fprintf(os.Stderr, "\rRollback in %ds... Type COMMIT to keep: ", int(left.Seconds())) - if left <= 0 { - fmt.Fprintln(os.Stderr) - logging.DebugStep(logger, "prompt commit", "Timeout expired, returning DeadlineExceeded") - return false, context.DeadlineExceeded - } - case line := <-inputCh: - fmt.Fprintln(os.Stderr) - trimmedLine := strings.TrimSpace(line) - logging.DebugStep(logger, "prompt commit", "User input received: %q", trimmedLine) - if strings.EqualFold(trimmedLine, "commit") { - logging.DebugStep(logger, "prompt commit", "Result: COMMITTED") - return true, nil - } - logging.DebugStep(logger, "prompt commit", "Result: NOT COMMITTED (input was not 'commit')") - return false, nil - case err := <-errCh: - fmt.Fprintln(os.Stderr) - logging.DebugStep(logger, "prompt commit", "Input error received: %v", err) - if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { - logging.DebugStep(logger, "prompt commit", "Result: context deadline/canceled") - return false, err - } - logging.DebugStep(logger, "prompt commit", "Result: NOT COMMITTED (input error)") + fmt.Fprint(os.Stderr, "Type COMMIT to keep: ") + logging.DebugStep(logger, "prompt commit", "Waiting for user input (no live countdown)") + line, err := input.ReadLineWithContext(ctxTimeout, reader) + fmt.Fprintln(os.Stderr) + if err != nil { + logging.DebugStep(logger, "prompt commit", "Input error received: %v", err) + if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { + logging.DebugStep(logger, "prompt commit", "Result: context deadline/canceled") return false, err } + logging.DebugStep(logger, "prompt commit", "Result: NOT COMMITTED (input error)") + return false, err + } + + trimmedLine := strings.TrimSpace(line) + logging.DebugStep(logger, "prompt commit", "User input received: %q", trimmedLine) + if strings.EqualFold(trimmedLine, "commit") { + logging.DebugStep(logger, "prompt commit", "Result: COMMITTED") + return true, nil } + logging.DebugStep(logger, "prompt commit", "Result: NOT COMMITTED (input was not 'commit')") + return false, nil } func rollbackNetworkFilesNow(ctx context.Context, logger *logging.Logger, backupPath, workDir string) (logPath string, err error) { diff --git a/internal/orchestrator/prompts_cli.go b/internal/orchestrator/prompts_cli.go index 4f4c03a..022e9d0 100644 --- a/internal/orchestrator/prompts_cli.go +++ b/internal/orchestrator/prompts_cli.go @@ -70,63 +70,45 @@ func promptYesNoWithCountdown(ctx context.Context, reader *bufio.Reader, logger ctxTimeout, cancel := context.WithDeadline(ctx, deadline) defer cancel() - inputCh := make(chan string, 1) - errCh := make(chan error, 1) - - go func() { - line, err := input.ReadLineWithContext(ctxTimeout, reader) - if err != nil { - errCh <- err - return - } - inputCh <- line - }() + deadlineHHMMSS := deadline.Format("15:04:05") + timeoutSeconds := int(timeout.Seconds()) + defaultLabel := "No" + if defaultYes { + defaultLabel = "Yes" + } - ticker := time.NewTicker(1 * time.Second) - defer ticker.Stop() + // Print a single prompt line to avoid interfering with interactive input on + // terminals that don't handle repeated carriage-return updates well (e.g. IPMI/serial). + fmt.Fprintf(os.Stderr, "Auto-skip in %ds (at %s, default: %s)... %s %s ", timeoutSeconds, deadlineHHMMSS, defaultLabel, question, defStr) - for { - select { - case <-ticker.C: - left := time.Until(deadline) - if left < 0 { - left = 0 - } - fmt.Fprintf(os.Stderr, "\rAuto-skip in %ds... %s %s ", int(left.Seconds()), question, defStr) - if left <= 0 { - fmt.Fprintln(os.Stderr) - logging.DebugStep(logger, "prompt yes/no", "Timeout expired: proceeding with No") - logger.Info("No response within %ds; proceeding with No.", int(timeout.Seconds())) - return false, nil - } - case line := <-inputCh: - fmt.Fprintln(os.Stderr) - trimmed := strings.ToLower(strings.TrimSpace(line)) - logging.DebugStep(logger, "prompt yes/no", "User input received: %q", trimmed) - switch trimmed { - case "": - return defaultYes, nil - case "y", "yes": - return true, nil - case "n", "no": - return false, nil - default: - logger.Info("Unrecognized input %q; proceeding with No.", strings.TrimSpace(line)) - return false, nil - } - case err := <-errCh: - fmt.Fprintln(os.Stderr) - if errors.Is(err, context.DeadlineExceeded) { - logging.DebugStep(logger, "prompt yes/no", "Input timed out: proceeding with No") - logger.Info("No response within %ds; proceeding with No.", int(timeout.Seconds())) - return false, nil - } - if errors.Is(err, context.Canceled) { - logging.DebugStep(logger, "prompt yes/no", "Input canceled: %v", err) - return false, err - } - logging.DebugStep(logger, "prompt yes/no", "Input error: %v", err) + logging.DebugStep(logger, "prompt yes/no", "Waiting for user input (no live countdown)") + line, err := input.ReadLineWithContext(ctxTimeout, reader) + fmt.Fprintln(os.Stderr) + if err != nil { + if errors.Is(err, context.DeadlineExceeded) { + logging.DebugStep(logger, "prompt yes/no", "Input timed out: proceeding with No") + logger.Info("No response within %ds; proceeding with No.", int(timeout.Seconds())) + return false, nil + } + if errors.Is(err, context.Canceled) { + logging.DebugStep(logger, "prompt yes/no", "Input canceled: %v", err) return false, err } + logging.DebugStep(logger, "prompt yes/no", "Input error: %v", err) + return false, err + } + + trimmed := strings.ToLower(strings.TrimSpace(line)) + logging.DebugStep(logger, "prompt yes/no", "User input received: %q", trimmed) + switch trimmed { + case "": + return defaultYes, nil + case "y", "yes": + return true, nil + case "n", "no": + return false, nil + default: + logger.Info("Unrecognized input %q; proceeding with No.", strings.TrimSpace(line)) + return false, nil } } diff --git a/internal/orchestrator/restore_tui.go b/internal/orchestrator/restore_tui.go index bc263dc..be03115 100644 --- a/internal/orchestrator/restore_tui.go +++ b/internal/orchestrator/restore_tui.go @@ -683,12 +683,13 @@ func promptYesNoTUIWithCountdown(ctx context.Context, logger *logging.Logger, ti SetDynamicColors(true) deadline := time.Now().Add(timeout) + deadlineHHMMSS := deadline.Format("15:04:05") updateCountdown := func() { left := time.Until(deadline) if left < 0 { left = 0 } - countdownText.SetText(fmt.Sprintf("Auto-skip in %ds (default: %s)", int(left.Seconds()), noLabel)) + countdownText.SetText(fmt.Sprintf("Auto-skip in %ds (at %s, default: %s)", int(left.Seconds()), deadlineHHMMSS, noLabel)) } updateCountdown() @@ -795,6 +796,8 @@ func promptNetworkCommitTUI(timeout time.Duration, health networkHealthReport, n return false, nil } + deadlineHHMMSS := time.Now().Add(timeout).Format("15:04:05") + infoText := tview.NewTextView(). SetWrap(true). SetTextColor(tcell.ColorWhite). @@ -863,8 +866,9 @@ func promptNetworkCommitTUI(timeout time.Duration, health networkHealthReport, n diagInfo = fmt.Sprintf("\n\nDiagnostics saved under:\n%s", diagnosticsDir) } - infoText.SetText(fmt.Sprintf("Rollback in [yellow]%ds[white].\n\n%sNetwork health: [%s]%s[white]\n%s%s\n\nType COMMIT or press the button to keep the new network configuration.\nIf you do nothing, rollback will be automatic.", + infoText.SetText(fmt.Sprintf("Rollback in [yellow]%ds[white] (deadline: [yellow]%s[white]).\n\n%sNetwork health: [%s]%s[white]\n%s%s\n\nType COMMIT or press the button to keep the new network configuration.\nIf you do nothing, rollback will be automatic.", value, + deadlineHHMMSS, repairInfo, healthColor(health.Severity), health.Severity.String(), From 79f6847892d13d9cd5b630431f5637df3a7275d6 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 20:39:23 +0100 Subject: [PATCH 8/9] Add recipient auto-detection for email notifier Implement automatic email recipient discovery for root@pam across Proxmox platforms. The Email notifier now tries pvesh (PVE API) first, falls back to legacy CLIs (pveum / proxmox-backup-manager), and finally parses user.cfg (/etc/pve/user.cfg or /etc/proxmox-backup/user.cfg). Added helper functions (redactEmail, runCombinedOutput, truncateForLog), new configurable paths and diagnostic limits, and improved debug logging with truncated diagnostics. Updated tests to cover pvesh and user.cfg fallbacks and adjusted existing tests. Documentation (CONFIGURATION and TROUBLESHOOTING) now describes auto-detection behavior and provides quick CLI checks. Minor update to internal/orchestrator/.backup.lock timestamp. --- docs/CONFIGURATION.md | 5 +- docs/TROUBLESHOOTING.md | 25 +++ internal/notify/email.go | 250 +++++++++++++++++++++++++---- internal/notify/email_test.go | 74 +++++++-- internal/orchestrator/.backup.lock | 4 +- 5 files changed, 312 insertions(+), 46 deletions(-) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 1c821fb..d09ead7 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -788,7 +788,10 @@ EMAIL_FROM=no-reply@proxmox.tis24.it - Allowed values for `EMAIL_DELIVERY_METHOD` are: `relay`, `sendmail`, `pmf` (invalid values will skip Email with a warning). - `EMAIL_FALLBACK_SENDMAIL` is a historical name (kept for compatibility). When `EMAIL_DELIVERY_METHOD=relay`, it enables fallback to **pmf** (it will not fall back to `/usr/sbin/sendmail`). - `relay` requires a real mailbox recipient and blocks `root@…` recipients; set `EMAIL_RECIPIENT` to a non-root mailbox if needed. -- `sendmail` requires a recipient and uses `/usr/sbin/sendmail`; ProxSave can auto-detect `root@pam` email from Proxmox if `EMAIL_RECIPIENT` is empty. +- If `EMAIL_RECIPIENT` is empty, ProxSave auto-detects the recipient from the `root@pam` user: + - **PVE**: Proxmox API via `pvesh get /access/users/root@pam` → fallback to `pveum user list` → fallback to `/etc/pve/user.cfg` + - **PBS**: `proxmox-backup-manager user list` → fallback to `/etc/proxmox-backup/user.cfg` +- `sendmail` requires a recipient and uses `/usr/sbin/sendmail` (auto-detect applies if `EMAIL_RECIPIENT` is empty, as described above). - With `pmf`, final delivery recipients are determined by Proxmox Notifications targets/matchers. `EMAIL_RECIPIENT` is only used for the `To:` header and may be empty. ### Gotify diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 373ccd7..91cdbdf 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -534,10 +534,34 @@ If Email is enabled but you don't see it being dispatched, ensure `EMAIL_DELIVER - Ensure the recipient is configured: - Set `EMAIL_RECIPIENT=...`, or - Leave it empty and set an email for `root@pam` inside Proxmox (auto-detect). +- Recipient auto-detection details (when `EMAIL_RECIPIENT` is empty): + - **PVE**: `pvesh get /access/users/root@pam` → fallback to `pveum user list` → fallback to `/etc/pve/user.cfg` + - **PBS**: `proxmox-backup-manager user list` → fallback to `/etc/proxmox-backup/user.cfg` - Relay blocks `root@…` recipients; use a real non-root mailbox for `EMAIL_RECIPIENT`. - If `EMAIL_FALLBACK_SENDMAIL=true`, ProxSave will fall back to `EMAIL_DELIVERY_METHOD=pmf` when the relay fails. - Check the proxsave logs for `email-relay` warnings/errors. +Quick checks for auto-detect: + +```bash +# Run only the relevant block for your platform (PVE vs PBS). + +# PVE (preferred: API via pvesh) +pvesh get /access/users/root@pam --output-format json + +# PVE (fallback: legacy CLI) +pveum user list --output-format json + +# PVE (last resort: config file) +grep -n '^user:root@pam:' /etc/pve/user.cfg + +# PBS (CLI) +proxmox-backup-manager user list --output-format json + +# PBS (config file) +grep -n '^user:root@pam:' /etc/proxmox-backup/user.cfg +``` + ##### If `EMAIL_DELIVERY_METHOD=sendmail` This mode uses `/usr/sbin/sendmail`, so your node must have a working local MTA (e.g. postfix). @@ -545,6 +569,7 @@ This mode uses `/usr/sbin/sendmail`, so your node must have a working local MTA - Ensure a recipient is available: - Set `EMAIL_RECIPIENT=...`, or - Leave it empty and set an email for `root@pam` inside Proxmox (auto-detect). +- If auto-detection fails, run the quick checks above and review proxsave debug logs (they include diagnostic output from failed Proxmox CLI/API calls). - Verify `sendmail` exists: ```bash test -x /usr/sbin/sendmail && echo "sendmail OK" || echo "sendmail not found" diff --git a/internal/notify/email.go b/internal/notify/email.go index f8beb1c..20a3afa 100644 --- a/internal/notify/email.go +++ b/internal/notify/email.go @@ -4,6 +4,7 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "os" "os/exec" @@ -84,6 +85,20 @@ var ( // postfixMainCFPath points to the Postfix main configuration file. // It is a variable to allow hermetic tests to override it. postfixMainCFPath = "/etc/postfix/main.cf" + + // pveUserCfgPath is the Proxmox VE user configuration file used as a last-resort fallback + // for recipient auto-detection when CLI/API tools are unavailable or broken. + // It is a variable to allow hermetic tests to override it. + pveUserCfgPath = "/etc/pve/user.cfg" + + // pbsUserCfgPath is the Proxmox Backup Server user configuration file used as a last-resort fallback + // for recipient auto-detection when CLI/API tools are unavailable or broken. + // It is a variable to allow hermetic tests to override it. + pbsUserCfgPath = "/etc/proxmox-backup/user.cfg" + + // recipientDetectMaxDiagBytes limits how much combined output is included in debug logs + // when a recipient auto-detection strategy fails. + recipientDetectMaxDiagBytes = 2048 ) // NewEmailNotifier creates a new Email notifier @@ -179,7 +194,7 @@ func (e *EmailNotifier) Send(ctx context.Context, data *NotificationData) (*Noti } } else { recipient = detectedRecipient - e.logger.Debug("Auto-detected email recipient: %s", recipient) + e.logger.Debug("Auto-detected email recipient: %s", redactEmail(recipient)) autoDetected = true } } @@ -347,57 +362,232 @@ func isRootRecipient(recipient string) bool { return parts[0] == "root" } -// detectRecipient attempts to auto-detect the email recipient from Proxmox configuration -// Replicates Bash logic: jq -r '.[] | select(.userid=="root@pam") | .email' +func redactEmail(addr string) string { + trimmed := strings.TrimSpace(addr) + if trimmed == "" { + return "" + } + parts := strings.SplitN(trimmed, "@", 2) + if len(parts) != 2 { + return trimmed + } + local := parts[0] + domain := parts[1] + if local == "" { + return "***@" + domain + } + if len(local) == 1 { + return local + "***@" + domain + } + return local[:1] + "***@" + domain +} + +// detectRecipient attempts to auto-detect the email recipient from Proxmox configuration. +// Strategy order (professional default): +// 1. PVE: API via pvesh (/access/users/root@pam) to match the UI source of truth +// 2. Legacy CLI: pveum / proxmox-backup-manager user list +// 3. Last resort: parse user.cfg directly (PVE: /etc/pve/user.cfg, PBS: /etc/proxmox-backup/user.cfg) func (e *EmailNotifier) detectRecipient(ctx context.Context) (string, error) { - var cmd *exec.Cmd + const targetUserID = "root@pam" + e.logger.Debug("Recipient auto-detection: looking up email for %s (proxmox=%s)", targetUserID, e.proxmoxType) switch e.proxmoxType { case types.ProxmoxVE: - // Try to get root user email from PVE - cmd = exec.CommandContext(ctx, "pveum", "user", "list", "--output-format", "json") + return e.detectRecipientPVE(ctx, targetUserID) case types.ProxmoxBS: - // Try to get root user email from PBS - cmd = exec.CommandContext(ctx, "proxmox-backup-manager", "user", "list", "--output-format", "json") + return e.detectRecipientPBS(ctx, targetUserID) default: return "", fmt.Errorf("unknown Proxmox type: %s", e.proxmoxType) } +} - // Execute command - output, err := cmd.Output() +type proxmoxUserListEntry struct { + UserID string `json:"userid"` + Email string `json:"email"` +} + +func (e *EmailNotifier) detectRecipientPVE(ctx context.Context, targetUserID string) (string, error) { + var failures []string + + // 1) Primary: API via pvesh (matches Proxmox UI source of truth) + email, err := e.detectRecipientPVEViaPvesh(ctx, targetUserID) + if err == nil && strings.TrimSpace(email) != "" { + e.logger.Debug("Recipient auto-detection: resolved via pvesh API for %s", targetUserID) + return email, nil + } + if err != nil { + failures = append(failures, fmt.Sprintf("pvesh: %v", err)) + } + + // 2) Secondary: legacy CLI via pveum (kept for compatibility) + email, err = e.detectRecipientViaUserListCLI(ctx, "pveum", []string{"user", "list", "--output-format", "json"}, targetUserID) + if err == nil && strings.TrimSpace(email) != "" { + e.logger.Debug("Recipient auto-detection: resolved via pveum for %s", targetUserID) + return email, nil + } + if err != nil { + failures = append(failures, fmt.Sprintf("pveum: %v", err)) + } + + // 3) Last resort: parse /etc/pve/user.cfg directly (independent from Proxmox CLI tooling) + email, err = e.detectRecipientViaUserCfg(pveUserCfgPath, targetUserID) + if err == nil && strings.TrimSpace(email) != "" { + e.logger.Debug("Recipient auto-detection: resolved via user.cfg for %s (path=%s)", targetUserID, pveUserCfgPath) + return email, nil + } + if err != nil { + failures = append(failures, fmt.Sprintf("user.cfg: %v", err)) + } + + return "", fmt.Errorf("recipient auto-detection failed for %s (pve): %s", targetUserID, strings.Join(failures, "; ")) +} + +func (e *EmailNotifier) detectRecipientPBS(ctx context.Context, targetUserID string) (string, error) { + var failures []string + + // PBS does not ship pvesh; the management CLI is the supported on-node entrypoint. + email, err := e.detectRecipientViaUserListCLI(ctx, "proxmox-backup-manager", []string{"user", "list", "--output-format", "json"}, targetUserID) + if err == nil && strings.TrimSpace(email) != "" { + e.logger.Debug("Recipient auto-detection: resolved via proxmox-backup-manager for %s", targetUserID) + return email, nil + } + if err != nil { + failures = append(failures, fmt.Sprintf("proxmox-backup-manager: %v", err)) + } + + // Last resort: parse /etc/proxmox-backup/user.cfg directly. + email, err = e.detectRecipientViaUserCfg(pbsUserCfgPath, targetUserID) + if err == nil && strings.TrimSpace(email) != "" { + e.logger.Debug("Recipient auto-detection: resolved via user.cfg for %s (path=%s)", targetUserID, pbsUserCfgPath) + return email, nil + } + if err != nil { + failures = append(failures, fmt.Sprintf("user.cfg: %v", err)) + } + + return "", fmt.Errorf("recipient auto-detection failed for %s (pbs): %s", targetUserID, strings.Join(failures, "; ")) +} + +func (e *EmailNotifier) detectRecipientPVEViaPvesh(ctx context.Context, targetUserID string) (string, error) { + // Prefer the single-user endpoint to avoid parsing the full list and to match the UI/API source of truth. + endpoint := "/access/users/" + targetUserID + cmdName := "pvesh" + args := []string{"get", endpoint, "--output-format=json"} + + out, err := runCombinedOutput(ctx, cmdName, args...) if err != nil { - return "", fmt.Errorf("failed to query Proxmox user list: %w", err) + e.logger.Debug("Recipient auto-detection: pvesh call failed (cmd=%s %s): %v", cmdName, strings.Join(args, " "), err) + if diag := strings.TrimSpace(string(out)); diag != "" { + e.logger.Debug("Recipient auto-detection: pvesh diagnostic output: %s", truncateForLog(diag, recipientDetectMaxDiagBytes)) + } + return "", fmt.Errorf("pvesh API query failed: %w", err) } - // Parse JSON array to find root@pam user - // Replicates: jq -r '.[] | select(.userid=="root@pam") | .email' - var users []map[string]interface{} - if err := json.Unmarshal(output, &users); err != nil { - return "", fmt.Errorf("failed to parse user list JSON: %w", err) + var payload struct { + Email string `json:"email"` + } + if err := json.Unmarshal(out, &payload); err != nil { + e.logger.Debug("Recipient auto-detection: pvesh JSON parse failed for %s: %v", endpoint, err) + return "", fmt.Errorf("failed to parse pvesh JSON response: %w", err) } - // Search for root@pam user specifically - for _, user := range users { - userid, useridOk := user["userid"].(string) - if !useridOk { + email := strings.TrimSpace(payload.Email) + if email == "" { + return "", fmt.Errorf("%s exists but has no email configured (pvesh)", targetUserID) + } + e.logger.Debug("Recipient auto-detection: pvesh returned an email for %s: %s", targetUserID, redactEmail(email)) + return email, nil +} + +func (e *EmailNotifier) detectRecipientViaUserListCLI(ctx context.Context, cmdName string, args []string, targetUserID string) (string, error) { + out, err := runCombinedOutput(ctx, cmdName, args...) + if err != nil { + // Command not found or non-zero exit status: include diagnostics in debug logs. + if errors.Is(err, exec.ErrNotFound) { + e.logger.Debug("Recipient auto-detection: %s not found in PATH (args=%s)", cmdName, strings.Join(args, " ")) + } else { + e.logger.Debug("Recipient auto-detection: %s command failed (args=%s): %v", cmdName, strings.Join(args, " "), err) + } + if diag := strings.TrimSpace(string(out)); diag != "" { + e.logger.Debug("Recipient auto-detection: %s diagnostic output: %s", cmdName, truncateForLog(diag, recipientDetectMaxDiagBytes)) + } + return "", fmt.Errorf("failed to query Proxmox user list via %s: %w", cmdName, err) + } + + var users []proxmoxUserListEntry + if err := json.Unmarshal(out, &users); err != nil { + return "", fmt.Errorf("failed to parse user list JSON from %s: %w", cmdName, err) + } + + for _, u := range users { + if strings.TrimSpace(u.UserID) != targetUserID { continue } + email := strings.TrimSpace(u.Email) + if email == "" { + return "", fmt.Errorf("%s user exists but has no email configured (%s)", targetUserID, cmdName) + } + e.logger.Debug("Recipient auto-detection: %s returned an email for %s: %s", cmdName, targetUserID, redactEmail(email)) + return email, nil + } - // Check if this is the root@pam user - if userid == "root@pam" { - email, emailOk := user["email"].(string) - if emailOk && email != "" { - e.logger.Debug("Found root@pam email: %s", email) - return email, nil - } - // root@pam found but no email configured - return "", fmt.Errorf("root@pam user exists but has no email configured") + return "", fmt.Errorf("%s user not found in Proxmox configuration (%s)", targetUserID, cmdName) +} + +func (e *EmailNotifier) detectRecipientViaUserCfg(cfgPath string, targetUserID string) (string, error) { + data, err := os.ReadFile(cfgPath) + if err != nil { + return "", fmt.Errorf("failed to read %s: %w", cfgPath, err) + } + + lines := strings.Split(string(data), "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + // Expected format (PVE/PBS): user:::::::... + // Example: user:root@pam:1:0:::info@tis24.it:: + if !strings.HasPrefix(line, "user:") { + continue + } + parts := strings.Split(line, ":") + if len(parts) < 7 { + continue + } + userID := strings.TrimSpace(parts[1]) + if userID != targetUserID { + continue + } + + email := strings.TrimSpace(parts[6]) + if email == "" { + return "", fmt.Errorf("%s user exists but has no email configured (user.cfg)", targetUserID) } + e.logger.Debug("Recipient auto-detection: user.cfg contains an email for %s: %s", targetUserID, redactEmail(email)) + return email, nil + } + + return "", fmt.Errorf("%s user not found in %s", targetUserID, cfgPath) +} + +func runCombinedOutput(ctx context.Context, name string, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, name, args...) + out, err := cmd.CombinedOutput() + if err != nil { + return out, err } + return out, nil +} - return "", fmt.Errorf("root@pam user not found in Proxmox configuration") +func truncateForLog(s string, maxBytes int) string { + if maxBytes <= 0 || len(s) <= maxBytes { + return s + } + return s[:maxBytes] + "...(truncated)" } // sendViaRelay sends email via cloud relay diff --git a/internal/notify/email_test.go b/internal/notify/email_test.go index c1d85a8..2f25449 100644 --- a/internal/notify/email_test.go +++ b/internal/notify/email_test.go @@ -79,19 +79,19 @@ func TestEmailNotifierBasicAccessors(t *testing.T) { } } - func TestDescribeEmailMethod(t *testing.T) { - tests := []struct { - method string - want string - }{ - {"email-relay", "cloud relay"}, - {"email-sendmail", "sendmail"}, - {"email-pmf", "proxmox-mail-forward"}, - {"email-pmf-fallback", "proxmox-mail-forward fallback"}, - {"custom", "custom"}, - } - for _, tt := range tests { - if got := describeEmailMethod(tt.method); got != tt.want { +func TestDescribeEmailMethod(t *testing.T) { + tests := []struct { + method string + want string + }{ + {"email-relay", "cloud relay"}, + {"email-sendmail", "sendmail"}, + {"email-pmf", "proxmox-mail-forward"}, + {"email-pmf-fallback", "proxmox-mail-forward fallback"}, + {"custom", "custom"}, + } + for _, tt := range tests { + if got := describeEmailMethod(tt.method); got != tt.want { t.Fatalf("describeEmailMethod(%s)=%s want %s", tt.method, got, tt.want) } } @@ -103,6 +103,8 @@ func TestEmailNotifierDetectRecipientPVE(t *testing.T) { if err != nil { t.Fatalf("NewEmailNotifier() error = %v", err) } + // Ensure we don't accidentally hit the real system pvesh during tests. + mockCmdEnv(t, "pvesh", "", 1) mockCmdEnv(t, "pveum", `[{"userid":"root@pam","email":"root@example.com"}]`, 0) recipient, err := notifier.detectRecipient(context.Background()) @@ -114,6 +116,52 @@ func TestEmailNotifierDetectRecipientPVE(t *testing.T) { } } +func TestEmailNotifierDetectRecipientPVEViaPvesh(t *testing.T) { + logger := logging.New(types.LogLevelDebug, false) + notifier, err := NewEmailNotifier(EmailConfig{}, types.ProxmoxVE, logger) + if err != nil { + t.Fatalf("NewEmailNotifier() error = %v", err) + } + mockCmdEnv(t, "pvesh", `{"email":"root@example.com"}`, 0) + + recipient, err := notifier.detectRecipient(context.Background()) + if err != nil { + t.Fatalf("detectRecipient() error = %v", err) + } + if recipient != "root@example.com" { + t.Fatalf("detectRecipient()=%s want root@example.com", recipient) + } +} + +func TestEmailNotifierDetectRecipientPVEViaUserCfgFallback(t *testing.T) { + logger := logging.New(types.LogLevelDebug, false) + notifier, err := NewEmailNotifier(EmailConfig{}, types.ProxmoxVE, logger) + if err != nil { + t.Fatalf("NewEmailNotifier() error = %v", err) + } + + origUserCfgPath := pveUserCfgPath + t.Cleanup(func() { pveUserCfgPath = origUserCfgPath }) + + tempDir := t.TempDir() + pveUserCfgPath = filepath.Join(tempDir, "user.cfg") + if err := os.WriteFile(pveUserCfgPath, []byte("user:root@pam:1:0:::root@example.com::\n"), 0o600); err != nil { + t.Fatalf("write user.cfg: %v", err) + } + + // Force both primary and secondary strategies to fail so the user.cfg fallback is exercised. + mockCmdEnv(t, "pvesh", "", 1) + mockCmdEnv(t, "pveum", "", 2) + + recipient, err := notifier.detectRecipient(context.Background()) + if err != nil { + t.Fatalf("detectRecipient() error = %v", err) + } + if recipient != "root@example.com" { + t.Fatalf("detectRecipient()=%s want root@example.com", recipient) + } +} + func TestEmailNotifierDetectRecipientPBSNoEmail(t *testing.T) { logger := logging.New(types.LogLevelDebug, false) notifier, err := NewEmailNotifier(EmailConfig{}, types.ProxmoxBS, logger) diff --git a/internal/orchestrator/.backup.lock b/internal/orchestrator/.backup.lock index 78a1e81..1c8685a 100644 --- a/internal/orchestrator/.backup.lock +++ b/internal/orchestrator/.backup.lock @@ -1,3 +1,3 @@ -pid=192209 +pid=227499 host=pve -time=2026-02-05T18:34:26+01:00 +time=2026-02-05T20:31:58+01:00 From 38fa4020b9f95e5093a1e56ca99d336071ee9493 Mon Sep 17 00:00:00 2001 From: tis24dev Date: Thu, 5 Feb 2026 20:54:40 +0100 Subject: [PATCH 9/9] Bump Go toolchain to 1.25.7 Update the go.mod toolchain directive from go1.25.6 to go1.25.7 to align with the latest Go 1.25 patch release. --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 1886f12..b1deb12 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,7 @@ module github.com/tis24dev/proxsave go 1.25 -toolchain go1.25.6 +toolchain go1.25.7 require ( filippo.io/age v1.3.1